]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2018 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #define IN_TARGET_CODE 1
18
19 #include "config.h"
20 #include "system.h"
21 #include "coretypes.h"
22 #include "backend.h"
23 #include "target.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "attribs.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "output.h"
49 #include "cfgrtl.h"
50 #include "cfgbuild.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "gimplify.h"
56 #include "tm-constrs.h"
57 #include "ddg.h"
58 #include "dumpfile.h"
59 #include "builtins.h"
60 #include "rtl-iter.h"
61
62 /* This file should be included last. */
63 #include "target-def.h"
64
65 /* Builtin types, data and prototypes. */
66
67 enum spu_builtin_type_index
68 {
69 SPU_BTI_END_OF_PARAMS,
70
71 /* We create new type nodes for these. */
72 SPU_BTI_V16QI,
73 SPU_BTI_V8HI,
74 SPU_BTI_V4SI,
75 SPU_BTI_V2DI,
76 SPU_BTI_V4SF,
77 SPU_BTI_V2DF,
78 SPU_BTI_UV16QI,
79 SPU_BTI_UV8HI,
80 SPU_BTI_UV4SI,
81 SPU_BTI_UV2DI,
82
83 /* A 16-byte type. (Implemented with V16QI_type_node) */
84 SPU_BTI_QUADWORD,
85
86 /* These all correspond to intSI_type_node */
87 SPU_BTI_7,
88 SPU_BTI_S7,
89 SPU_BTI_U7,
90 SPU_BTI_S10,
91 SPU_BTI_S10_4,
92 SPU_BTI_U14,
93 SPU_BTI_16,
94 SPU_BTI_S16,
95 SPU_BTI_S16_2,
96 SPU_BTI_U16,
97 SPU_BTI_U16_2,
98 SPU_BTI_U18,
99
100 /* These correspond to the standard types */
101 SPU_BTI_INTQI,
102 SPU_BTI_INTHI,
103 SPU_BTI_INTSI,
104 SPU_BTI_INTDI,
105
106 SPU_BTI_UINTQI,
107 SPU_BTI_UINTHI,
108 SPU_BTI_UINTSI,
109 SPU_BTI_UINTDI,
110
111 SPU_BTI_FLOAT,
112 SPU_BTI_DOUBLE,
113
114 SPU_BTI_VOID,
115 SPU_BTI_PTR,
116
117 SPU_BTI_MAX
118 };
119
120 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
121 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
122 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
123 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
124 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
125 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
126 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
127 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
128 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
129 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
130
131 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
132
133 struct spu_builtin_range
134 {
135 int low, high;
136 };
137
138 static struct spu_builtin_range spu_builtin_range[] = {
139 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
140 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
141 {0ll, 0x7fll}, /* SPU_BTI_U7 */
142 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
143 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
144 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
145 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
146 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
147 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
148 {0ll, 0xffffll}, /* SPU_BTI_U16 */
149 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
150 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
151 };
152
153 \f
154 /* Target specific attribute specifications. */
155 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
156
157 /* Prototypes and external defs. */
158 static int get_pipe (rtx_insn *insn);
159 static int spu_naked_function_p (tree func);
160 static int mem_is_padded_component_ref (rtx x);
161 static void fix_range (const char *);
162 static rtx spu_expand_load (rtx, rtx, rtx, int);
163
164 /* Which instruction set architecture to use. */
165 int spu_arch;
166 /* Which cpu are we tuning for. */
167 int spu_tune;
168
169 /* The hardware requires 8 insns between a hint and the branch it
170 effects. This variable describes how many rtl instructions the
171 compiler needs to see before inserting a hint, and then the compiler
172 will insert enough nops to make it at least 8 insns. The default is
173 for the compiler to allow up to 2 nops be emitted. The nops are
174 inserted in pairs, so we round down. */
175 int spu_hint_dist = (8*4) - (2*4);
176
177 enum spu_immediate {
178 SPU_NONE,
179 SPU_IL,
180 SPU_ILA,
181 SPU_ILH,
182 SPU_ILHU,
183 SPU_ORI,
184 SPU_ORHI,
185 SPU_ORBI,
186 SPU_IOHL
187 };
188 enum immediate_class
189 {
190 IC_POOL, /* constant pool */
191 IC_IL1, /* one il* instruction */
192 IC_IL2, /* both ilhu and iohl instructions */
193 IC_IL1s, /* one il* instruction */
194 IC_IL2s, /* both ilhu and iohl instructions */
195 IC_FSMBI, /* the fsmbi instruction */
196 IC_CPAT, /* one of the c*d instructions */
197 IC_FSMBI2 /* fsmbi plus 1 other instruction */
198 };
199
200 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
201 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
202 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
203 static enum immediate_class classify_immediate (rtx op,
204 machine_mode mode);
205
206 /* Pointer mode for __ea references. */
207 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208
209 \f
210 /* Define the structure for the machine field in struct function. */
211 struct GTY(()) machine_function
212 {
213 /* Register to use for PIC accesses. */
214 rtx pic_reg;
215 };
216
217 /* How to allocate a 'struct machine_function'. */
218 static struct machine_function *
219 spu_init_machine_status (void)
220 {
221 return ggc_cleared_alloc<machine_function> ();
222 }
223
224 /* Implement TARGET_OPTION_OVERRIDE. */
225 static void
226 spu_option_override (void)
227 {
228 /* Set up function hooks. */
229 init_machine_status = spu_init_machine_status;
230
231 /* Small loops will be unpeeled at -O3. For SPU it is more important
232 to keep code small by default. */
233 if (!flag_unroll_loops && !flag_peel_loops)
234 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
235 global_options.x_param_values,
236 global_options_set.x_param_values);
237
238 flag_omit_frame_pointer = 1;
239
240 /* Functions must be 8 byte aligned so we correctly handle dual issue */
241 if (align_functions < 8)
242 align_functions = 8;
243
244 spu_hint_dist = 8*4 - spu_max_nops*4;
245 if (spu_hint_dist < 0)
246 spu_hint_dist = 0;
247
248 if (spu_fixed_range_string)
249 fix_range (spu_fixed_range_string);
250
251 /* Determine processor architectural level. */
252 if (spu_arch_string)
253 {
254 if (strcmp (&spu_arch_string[0], "cell") == 0)
255 spu_arch = PROCESSOR_CELL;
256 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
257 spu_arch = PROCESSOR_CELLEDP;
258 else
259 error ("bad value (%s) for -march= switch", spu_arch_string);
260 }
261
262 /* Determine processor to tune for. */
263 if (spu_tune_string)
264 {
265 if (strcmp (&spu_tune_string[0], "cell") == 0)
266 spu_tune = PROCESSOR_CELL;
267 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
268 spu_tune = PROCESSOR_CELLEDP;
269 else
270 error ("bad value (%s) for -mtune= switch", spu_tune_string);
271 }
272
273 /* Change defaults according to the processor architecture. */
274 if (spu_arch == PROCESSOR_CELLEDP)
275 {
276 /* If no command line option has been otherwise specified, change
277 the default to -mno-safe-hints on celledp -- only the original
278 Cell/B.E. processors require this workaround. */
279 if (!(target_flags_explicit & MASK_SAFE_HINTS))
280 target_flags &= ~MASK_SAFE_HINTS;
281 }
282
283 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
284 }
285 \f
286 /* Implement TARGET_HARD_REGNO_NREGS. */
287
288 static unsigned int
289 spu_hard_regno_nregs (unsigned int, machine_mode mode)
290 {
291 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
292 }
293
294 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
295 struct attribute_spec.handler. */
296
297 /* True if MODE is valid for the target. By "valid", we mean able to
298 be manipulated in non-trivial ways. In particular, this means all
299 the arithmetic is supported. */
300 static bool
301 spu_scalar_mode_supported_p (scalar_mode mode)
302 {
303 switch (mode)
304 {
305 case E_QImode:
306 case E_HImode:
307 case E_SImode:
308 case E_SFmode:
309 case E_DImode:
310 case E_TImode:
311 case E_DFmode:
312 return true;
313
314 default:
315 return false;
316 }
317 }
318
319 /* Similarly for vector modes. "Supported" here is less strict. At
320 least some operations are supported; need to check optabs or builtins
321 for further details. */
322 static bool
323 spu_vector_mode_supported_p (machine_mode mode)
324 {
325 switch (mode)
326 {
327 case E_V16QImode:
328 case E_V8HImode:
329 case E_V4SImode:
330 case E_V2DImode:
331 case E_V4SFmode:
332 case E_V2DFmode:
333 return true;
334
335 default:
336 return false;
337 }
338 }
339
340 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
341 least significant bytes of the outer mode. This function returns
342 TRUE for the SUBREG's where this is correct. */
343 int
344 valid_subreg (rtx op)
345 {
346 machine_mode om = GET_MODE (op);
347 machine_mode im = GET_MODE (SUBREG_REG (op));
348 return om != VOIDmode && im != VOIDmode
349 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
350 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
351 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
352 }
353
354 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
355 and adjust the start offset. */
356 static rtx
357 adjust_operand (rtx op, HOST_WIDE_INT * start)
358 {
359 machine_mode mode;
360 int op_size;
361 /* Strip any paradoxical SUBREG. */
362 if (GET_CODE (op) == SUBREG
363 && (GET_MODE_BITSIZE (GET_MODE (op))
364 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
365 {
366 if (start)
367 *start -=
368 GET_MODE_BITSIZE (GET_MODE (op)) -
369 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
370 op = SUBREG_REG (op);
371 }
372 /* If it is smaller than SI, assure a SUBREG */
373 op_size = GET_MODE_BITSIZE (GET_MODE (op));
374 if (op_size < 32)
375 {
376 if (start)
377 *start += 32 - op_size;
378 op_size = 32;
379 }
380 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
381 mode = int_mode_for_size (op_size, 0).require ();
382 if (mode != GET_MODE (op))
383 op = gen_rtx_SUBREG (mode, op, 0);
384 return op;
385 }
386
387 void
388 spu_expand_extv (rtx ops[], int unsignedp)
389 {
390 rtx dst = ops[0], src = ops[1];
391 HOST_WIDE_INT width = INTVAL (ops[2]);
392 HOST_WIDE_INT start = INTVAL (ops[3]);
393 HOST_WIDE_INT align_mask;
394 rtx s0, s1, mask, r0;
395
396 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
397
398 if (MEM_P (src))
399 {
400 /* First, determine if we need 1 TImode load or 2. We need only 1
401 if the bits being extracted do not cross the alignment boundary
402 as determined by the MEM and its address. */
403
404 align_mask = -MEM_ALIGN (src);
405 if ((start & align_mask) == ((start + width - 1) & align_mask))
406 {
407 /* Alignment is sufficient for 1 load. */
408 s0 = gen_reg_rtx (TImode);
409 r0 = spu_expand_load (s0, 0, src, start / 8);
410 start &= 7;
411 if (r0)
412 emit_insn (gen_rotqby_ti (s0, s0, r0));
413 }
414 else
415 {
416 /* Need 2 loads. */
417 s0 = gen_reg_rtx (TImode);
418 s1 = gen_reg_rtx (TImode);
419 r0 = spu_expand_load (s0, s1, src, start / 8);
420 start &= 7;
421
422 gcc_assert (start + width <= 128);
423 if (r0)
424 {
425 rtx r1 = gen_reg_rtx (SImode);
426 mask = gen_reg_rtx (TImode);
427 emit_move_insn (mask, GEN_INT (-1));
428 emit_insn (gen_rotqby_ti (s0, s0, r0));
429 emit_insn (gen_rotqby_ti (s1, s1, r0));
430 if (GET_CODE (r0) == CONST_INT)
431 r1 = GEN_INT (INTVAL (r0) & 15);
432 else
433 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
434 emit_insn (gen_shlqby_ti (mask, mask, r1));
435 emit_insn (gen_selb (s0, s1, s0, mask));
436 }
437 }
438
439 }
440 else if (GET_CODE (src) == SUBREG)
441 {
442 rtx r = SUBREG_REG (src);
443 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
444 s0 = gen_reg_rtx (TImode);
445 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
446 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
447 else
448 emit_move_insn (s0, src);
449 }
450 else
451 {
452 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
453 s0 = gen_reg_rtx (TImode);
454 emit_move_insn (s0, src);
455 }
456
457 /* Now s0 is TImode and contains the bits to extract at start. */
458
459 if (start)
460 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
461
462 if (128 - width)
463 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
464
465 emit_move_insn (dst, s0);
466 }
467
468 void
469 spu_expand_insv (rtx ops[])
470 {
471 HOST_WIDE_INT width = INTVAL (ops[1]);
472 HOST_WIDE_INT start = INTVAL (ops[2]);
473 unsigned HOST_WIDE_INT maskbits;
474 machine_mode dst_mode;
475 rtx dst = ops[0], src = ops[3];
476 int dst_size;
477 rtx mask;
478 rtx shift_reg;
479 int shift;
480
481
482 if (GET_CODE (ops[0]) == MEM)
483 dst = gen_reg_rtx (TImode);
484 else
485 dst = adjust_operand (dst, &start);
486 dst_mode = GET_MODE (dst);
487 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
488
489 if (CONSTANT_P (src))
490 {
491 machine_mode m =
492 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
493 src = force_reg (m, convert_to_mode (m, src, 0));
494 }
495 src = adjust_operand (src, 0);
496
497 mask = gen_reg_rtx (dst_mode);
498 shift_reg = gen_reg_rtx (dst_mode);
499 shift = dst_size - start - width;
500
501 /* It's not safe to use subreg here because the compiler assumes
502 that the SUBREG_REG is right justified in the SUBREG. */
503 convert_move (shift_reg, src, 1);
504
505 if (shift > 0)
506 {
507 switch (dst_mode)
508 {
509 case E_SImode:
510 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
511 break;
512 case E_DImode:
513 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
515 case E_TImode:
516 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
517 break;
518 default:
519 abort ();
520 }
521 }
522 else if (shift < 0)
523 abort ();
524
525 switch (dst_size)
526 {
527 case 32:
528 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
529 if (start)
530 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
531 emit_move_insn (mask, GEN_INT (maskbits));
532 break;
533 case 64:
534 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
535 if (start)
536 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
537 emit_move_insn (mask, GEN_INT (maskbits));
538 break;
539 case 128:
540 {
541 unsigned char arr[16];
542 int i = start / 8;
543 memset (arr, 0, sizeof (arr));
544 arr[i] = 0xff >> (start & 7);
545 for (i++; i <= (start + width - 1) / 8; i++)
546 arr[i] = 0xff;
547 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
548 emit_move_insn (mask, array_to_constant (TImode, arr));
549 }
550 break;
551 default:
552 abort ();
553 }
554 if (GET_CODE (ops[0]) == MEM)
555 {
556 rtx low = gen_reg_rtx (SImode);
557 rtx rotl = gen_reg_rtx (SImode);
558 rtx mask0 = gen_reg_rtx (TImode);
559 rtx addr;
560 rtx addr0;
561 rtx addr1;
562 rtx mem;
563
564 addr = force_reg (Pmode, XEXP (ops[0], 0));
565 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
566 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
567 emit_insn (gen_negsi2 (rotl, low));
568 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
569 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
570 mem = change_address (ops[0], TImode, addr0);
571 set_mem_alias_set (mem, 0);
572 emit_move_insn (dst, mem);
573 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
574 if (start + width > MEM_ALIGN (ops[0]))
575 {
576 rtx shl = gen_reg_rtx (SImode);
577 rtx mask1 = gen_reg_rtx (TImode);
578 rtx dst1 = gen_reg_rtx (TImode);
579 rtx mem1;
580 addr1 = plus_constant (Pmode, addr, 16);
581 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
582 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
583 emit_insn (gen_shlqby_ti (mask1, mask, shl));
584 mem1 = change_address (ops[0], TImode, addr1);
585 set_mem_alias_set (mem1, 0);
586 emit_move_insn (dst1, mem1);
587 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
588 emit_move_insn (mem1, dst1);
589 }
590 emit_move_insn (mem, dst);
591 }
592 else
593 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
594 }
595
596
597 int
598 spu_expand_block_move (rtx ops[])
599 {
600 HOST_WIDE_INT bytes, align, offset;
601 rtx src, dst, sreg, dreg, target;
602 int i;
603 if (GET_CODE (ops[2]) != CONST_INT
604 || GET_CODE (ops[3]) != CONST_INT
605 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
606 return 0;
607
608 bytes = INTVAL (ops[2]);
609 align = INTVAL (ops[3]);
610
611 if (bytes <= 0)
612 return 1;
613
614 dst = ops[0];
615 src = ops[1];
616
617 if (align == 16)
618 {
619 for (offset = 0; offset + 16 <= bytes; offset += 16)
620 {
621 dst = adjust_address (ops[0], V16QImode, offset);
622 src = adjust_address (ops[1], V16QImode, offset);
623 emit_move_insn (dst, src);
624 }
625 if (offset < bytes)
626 {
627 rtx mask;
628 unsigned char arr[16] = { 0 };
629 for (i = 0; i < bytes - offset; i++)
630 arr[i] = 0xff;
631 dst = adjust_address (ops[0], V16QImode, offset);
632 src = adjust_address (ops[1], V16QImode, offset);
633 mask = gen_reg_rtx (V16QImode);
634 sreg = gen_reg_rtx (V16QImode);
635 dreg = gen_reg_rtx (V16QImode);
636 target = gen_reg_rtx (V16QImode);
637 emit_move_insn (mask, array_to_constant (V16QImode, arr));
638 emit_move_insn (dreg, dst);
639 emit_move_insn (sreg, src);
640 emit_insn (gen_selb (target, dreg, sreg, mask));
641 emit_move_insn (dst, target);
642 }
643 return 1;
644 }
645 return 0;
646 }
647
648 enum spu_comp_code
649 { SPU_EQ, SPU_GT, SPU_GTU };
650
651 int spu_comp_icode[12][3] = {
652 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
653 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
654 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
655 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
656 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
657 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
658 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
659 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
660 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
661 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
662 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
663 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
664 };
665
666 /* Generate a compare for CODE. Return a brand-new rtx that represents
667 the result of the compare. GCC can figure this out too if we don't
668 provide all variations of compares, but GCC always wants to use
669 WORD_MODE, we can generate better code in most cases if we do it
670 ourselves. */
671 void
672 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
673 {
674 int reverse_compare = 0;
675 int reverse_test = 0;
676 rtx compare_result, eq_result;
677 rtx comp_rtx, eq_rtx;
678 machine_mode comp_mode;
679 machine_mode op_mode;
680 enum spu_comp_code scode, eq_code;
681 enum insn_code ior_code;
682 enum rtx_code code = GET_CODE (cmp);
683 rtx op0 = XEXP (cmp, 0);
684 rtx op1 = XEXP (cmp, 1);
685 int index;
686 int eq_test = 0;
687
688 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
689 and so on, to keep the constant in operand 1. */
690 if (GET_CODE (op1) == CONST_INT)
691 {
692 HOST_WIDE_INT val = INTVAL (op1) - 1;
693 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
694 switch (code)
695 {
696 case GE:
697 op1 = GEN_INT (val);
698 code = GT;
699 break;
700 case LT:
701 op1 = GEN_INT (val);
702 code = LE;
703 break;
704 case GEU:
705 op1 = GEN_INT (val);
706 code = GTU;
707 break;
708 case LTU:
709 op1 = GEN_INT (val);
710 code = LEU;
711 break;
712 default:
713 break;
714 }
715 }
716
717 /* However, if we generate an integer result, performing a reverse test
718 would require an extra negation, so avoid that where possible. */
719 if (GET_CODE (op1) == CONST_INT && is_set == 1)
720 {
721 HOST_WIDE_INT val = INTVAL (op1) + 1;
722 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
723 switch (code)
724 {
725 case LE:
726 op1 = GEN_INT (val);
727 code = LT;
728 break;
729 case LEU:
730 op1 = GEN_INT (val);
731 code = LTU;
732 break;
733 default:
734 break;
735 }
736 }
737
738 comp_mode = SImode;
739 op_mode = GET_MODE (op0);
740
741 switch (code)
742 {
743 case GE:
744 scode = SPU_GT;
745 if (HONOR_NANS (op_mode))
746 {
747 reverse_compare = 0;
748 reverse_test = 0;
749 eq_test = 1;
750 eq_code = SPU_EQ;
751 }
752 else
753 {
754 reverse_compare = 1;
755 reverse_test = 1;
756 }
757 break;
758 case LE:
759 scode = SPU_GT;
760 if (HONOR_NANS (op_mode))
761 {
762 reverse_compare = 1;
763 reverse_test = 0;
764 eq_test = 1;
765 eq_code = SPU_EQ;
766 }
767 else
768 {
769 reverse_compare = 0;
770 reverse_test = 1;
771 }
772 break;
773 case LT:
774 reverse_compare = 1;
775 reverse_test = 0;
776 scode = SPU_GT;
777 break;
778 case GEU:
779 reverse_compare = 1;
780 reverse_test = 1;
781 scode = SPU_GTU;
782 break;
783 case LEU:
784 reverse_compare = 0;
785 reverse_test = 1;
786 scode = SPU_GTU;
787 break;
788 case LTU:
789 reverse_compare = 1;
790 reverse_test = 0;
791 scode = SPU_GTU;
792 break;
793 case NE:
794 reverse_compare = 0;
795 reverse_test = 1;
796 scode = SPU_EQ;
797 break;
798
799 case EQ:
800 scode = SPU_EQ;
801 break;
802 case GT:
803 scode = SPU_GT;
804 break;
805 case GTU:
806 scode = SPU_GTU;
807 break;
808 default:
809 scode = SPU_EQ;
810 break;
811 }
812
813 switch (op_mode)
814 {
815 case E_QImode:
816 index = 0;
817 comp_mode = QImode;
818 break;
819 case E_HImode:
820 index = 1;
821 comp_mode = HImode;
822 break;
823 case E_SImode:
824 index = 2;
825 break;
826 case E_DImode:
827 index = 3;
828 break;
829 case E_TImode:
830 index = 4;
831 break;
832 case E_SFmode:
833 index = 5;
834 break;
835 case E_DFmode:
836 index = 6;
837 break;
838 case E_V16QImode:
839 index = 7;
840 comp_mode = op_mode;
841 break;
842 case E_V8HImode:
843 index = 8;
844 comp_mode = op_mode;
845 break;
846 case E_V4SImode:
847 index = 9;
848 comp_mode = op_mode;
849 break;
850 case E_V4SFmode:
851 index = 10;
852 comp_mode = V4SImode;
853 break;
854 case E_V2DFmode:
855 index = 11;
856 comp_mode = V2DImode;
857 break;
858 case E_V2DImode:
859 default:
860 abort ();
861 }
862
863 if (GET_MODE (op1) == DFmode
864 && (scode != SPU_GT && scode != SPU_EQ))
865 abort ();
866
867 if (is_set == 0 && op1 == const0_rtx
868 && (GET_MODE (op0) == SImode
869 || GET_MODE (op0) == HImode
870 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
871 {
872 /* Don't need to set a register with the result when we are
873 comparing against zero and branching. */
874 reverse_test = !reverse_test;
875 compare_result = op0;
876 }
877 else
878 {
879 compare_result = gen_reg_rtx (comp_mode);
880
881 if (reverse_compare)
882 {
883 rtx t = op1;
884 op1 = op0;
885 op0 = t;
886 }
887
888 if (spu_comp_icode[index][scode] == 0)
889 abort ();
890
891 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
892 (op0, op_mode))
893 op0 = force_reg (op_mode, op0);
894 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
895 (op1, op_mode))
896 op1 = force_reg (op_mode, op1);
897 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
898 op0, op1);
899 if (comp_rtx == 0)
900 abort ();
901 emit_insn (comp_rtx);
902
903 if (eq_test)
904 {
905 eq_result = gen_reg_rtx (comp_mode);
906 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
907 op0, op1);
908 if (eq_rtx == 0)
909 abort ();
910 emit_insn (eq_rtx);
911 ior_code = optab_handler (ior_optab, comp_mode);
912 gcc_assert (ior_code != CODE_FOR_nothing);
913 emit_insn (GEN_FCN (ior_code)
914 (compare_result, compare_result, eq_result));
915 }
916 }
917
918 if (is_set == 0)
919 {
920 rtx bcomp;
921 rtx loc_ref;
922
923 /* We don't have branch on QI compare insns, so we convert the
924 QI compare result to a HI result. */
925 if (comp_mode == QImode)
926 {
927 rtx old_res = compare_result;
928 compare_result = gen_reg_rtx (HImode);
929 comp_mode = HImode;
930 emit_insn (gen_extendqihi2 (compare_result, old_res));
931 }
932
933 if (reverse_test)
934 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
935 else
936 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
937
938 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
939 emit_jump_insn (gen_rtx_SET (pc_rtx,
940 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
941 loc_ref, pc_rtx)));
942 }
943 else if (is_set == 2)
944 {
945 rtx target = operands[0];
946 int compare_size = GET_MODE_BITSIZE (comp_mode);
947 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
948 machine_mode mode = int_mode_for_size (target_size, 0).require ();
949 rtx select_mask;
950 rtx op_t = operands[2];
951 rtx op_f = operands[3];
952
953 /* The result of the comparison can be SI, HI or QI mode. Create a
954 mask based on that result. */
955 if (target_size > compare_size)
956 {
957 select_mask = gen_reg_rtx (mode);
958 emit_insn (gen_extend_compare (select_mask, compare_result));
959 }
960 else if (target_size < compare_size)
961 select_mask =
962 gen_rtx_SUBREG (mode, compare_result,
963 (compare_size - target_size) / BITS_PER_UNIT);
964 else if (comp_mode != mode)
965 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
966 else
967 select_mask = compare_result;
968
969 if (GET_MODE (target) != GET_MODE (op_t)
970 || GET_MODE (target) != GET_MODE (op_f))
971 abort ();
972
973 if (reverse_test)
974 emit_insn (gen_selb (target, op_t, op_f, select_mask));
975 else
976 emit_insn (gen_selb (target, op_f, op_t, select_mask));
977 }
978 else
979 {
980 rtx target = operands[0];
981 if (reverse_test)
982 emit_insn (gen_rtx_SET (compare_result,
983 gen_rtx_NOT (comp_mode, compare_result)));
984 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
985 emit_insn (gen_extendhisi2 (target, compare_result));
986 else if (GET_MODE (target) == SImode
987 && GET_MODE (compare_result) == QImode)
988 emit_insn (gen_extend_compare (target, compare_result));
989 else
990 emit_move_insn (target, compare_result);
991 }
992 }
993
994 HOST_WIDE_INT
995 const_double_to_hwint (rtx x)
996 {
997 HOST_WIDE_INT val;
998 if (GET_MODE (x) == SFmode)
999 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
1000 else if (GET_MODE (x) == DFmode)
1001 {
1002 long l[2];
1003 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1004 val = l[0];
1005 val = (val << 32) | (l[1] & 0xffffffff);
1006 }
1007 else
1008 abort ();
1009 return val;
1010 }
1011
1012 rtx
1013 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1014 {
1015 long tv[2];
1016 REAL_VALUE_TYPE rv;
1017 gcc_assert (mode == SFmode || mode == DFmode);
1018
1019 if (mode == SFmode)
1020 tv[0] = (v << 32) >> 32;
1021 else if (mode == DFmode)
1022 {
1023 tv[1] = (v << 32) >> 32;
1024 tv[0] = v >> 32;
1025 }
1026 real_from_target (&rv, tv, mode);
1027 return const_double_from_real_value (rv, mode);
1028 }
1029
1030 void
1031 print_operand_address (FILE * file, register rtx addr)
1032 {
1033 rtx reg;
1034 rtx offset;
1035
1036 if (GET_CODE (addr) == AND
1037 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1038 && INTVAL (XEXP (addr, 1)) == -16)
1039 addr = XEXP (addr, 0);
1040
1041 switch (GET_CODE (addr))
1042 {
1043 case REG:
1044 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1045 break;
1046
1047 case PLUS:
1048 reg = XEXP (addr, 0);
1049 offset = XEXP (addr, 1);
1050 if (GET_CODE (offset) == REG)
1051 {
1052 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1053 reg_names[REGNO (offset)]);
1054 }
1055 else if (GET_CODE (offset) == CONST_INT)
1056 {
1057 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1058 INTVAL (offset), reg_names[REGNO (reg)]);
1059 }
1060 else
1061 abort ();
1062 break;
1063
1064 case CONST:
1065 case LABEL_REF:
1066 case SYMBOL_REF:
1067 case CONST_INT:
1068 output_addr_const (file, addr);
1069 break;
1070
1071 default:
1072 debug_rtx (addr);
1073 abort ();
1074 }
1075 }
1076
1077 void
1078 print_operand (FILE * file, rtx x, int code)
1079 {
1080 machine_mode mode = GET_MODE (x);
1081 HOST_WIDE_INT val;
1082 unsigned char arr[16];
1083 int xcode = GET_CODE (x);
1084 int i, info;
1085 if (GET_MODE (x) == VOIDmode)
1086 switch (code)
1087 {
1088 case 'L': /* 128 bits, signed */
1089 case 'm': /* 128 bits, signed */
1090 case 'T': /* 128 bits, signed */
1091 case 't': /* 128 bits, signed */
1092 mode = TImode;
1093 break;
1094 case 'K': /* 64 bits, signed */
1095 case 'k': /* 64 bits, signed */
1096 case 'D': /* 64 bits, signed */
1097 case 'd': /* 64 bits, signed */
1098 mode = DImode;
1099 break;
1100 case 'J': /* 32 bits, signed */
1101 case 'j': /* 32 bits, signed */
1102 case 's': /* 32 bits, signed */
1103 case 'S': /* 32 bits, signed */
1104 mode = SImode;
1105 break;
1106 }
1107 switch (code)
1108 {
1109
1110 case 'j': /* 32 bits, signed */
1111 case 'k': /* 64 bits, signed */
1112 case 'm': /* 128 bits, signed */
1113 if (xcode == CONST_INT
1114 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1115 {
1116 gcc_assert (logical_immediate_p (x, mode));
1117 constant_to_array (mode, x, arr);
1118 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1119 val = trunc_int_for_mode (val, SImode);
1120 switch (which_logical_immediate (val))
1121 {
1122 case SPU_ORI:
1123 break;
1124 case SPU_ORHI:
1125 fprintf (file, "h");
1126 break;
1127 case SPU_ORBI:
1128 fprintf (file, "b");
1129 break;
1130 default:
1131 gcc_unreachable();
1132 }
1133 }
1134 else
1135 gcc_unreachable();
1136 return;
1137
1138 case 'J': /* 32 bits, signed */
1139 case 'K': /* 64 bits, signed */
1140 case 'L': /* 128 bits, signed */
1141 if (xcode == CONST_INT
1142 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1143 {
1144 gcc_assert (logical_immediate_p (x, mode)
1145 || iohl_immediate_p (x, mode));
1146 constant_to_array (mode, x, arr);
1147 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1148 val = trunc_int_for_mode (val, SImode);
1149 switch (which_logical_immediate (val))
1150 {
1151 case SPU_ORI:
1152 case SPU_IOHL:
1153 break;
1154 case SPU_ORHI:
1155 val = trunc_int_for_mode (val, HImode);
1156 break;
1157 case SPU_ORBI:
1158 val = trunc_int_for_mode (val, QImode);
1159 break;
1160 default:
1161 gcc_unreachable();
1162 }
1163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1164 }
1165 else
1166 gcc_unreachable();
1167 return;
1168
1169 case 't': /* 128 bits, signed */
1170 case 'd': /* 64 bits, signed */
1171 case 's': /* 32 bits, signed */
1172 if (CONSTANT_P (x))
1173 {
1174 enum immediate_class c = classify_immediate (x, mode);
1175 switch (c)
1176 {
1177 case IC_IL1:
1178 constant_to_array (mode, x, arr);
1179 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1180 val = trunc_int_for_mode (val, SImode);
1181 switch (which_immediate_load (val))
1182 {
1183 case SPU_IL:
1184 break;
1185 case SPU_ILA:
1186 fprintf (file, "a");
1187 break;
1188 case SPU_ILH:
1189 fprintf (file, "h");
1190 break;
1191 case SPU_ILHU:
1192 fprintf (file, "hu");
1193 break;
1194 default:
1195 gcc_unreachable ();
1196 }
1197 break;
1198 case IC_CPAT:
1199 constant_to_array (mode, x, arr);
1200 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1201 if (info == 1)
1202 fprintf (file, "b");
1203 else if (info == 2)
1204 fprintf (file, "h");
1205 else if (info == 4)
1206 fprintf (file, "w");
1207 else if (info == 8)
1208 fprintf (file, "d");
1209 break;
1210 case IC_IL1s:
1211 if (xcode == CONST_VECTOR)
1212 {
1213 x = CONST_VECTOR_ELT (x, 0);
1214 xcode = GET_CODE (x);
1215 }
1216 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1217 fprintf (file, "a");
1218 else if (xcode == HIGH)
1219 fprintf (file, "hu");
1220 break;
1221 case IC_FSMBI:
1222 case IC_FSMBI2:
1223 case IC_IL2:
1224 case IC_IL2s:
1225 case IC_POOL:
1226 abort ();
1227 }
1228 }
1229 else
1230 gcc_unreachable ();
1231 return;
1232
1233 case 'T': /* 128 bits, signed */
1234 case 'D': /* 64 bits, signed */
1235 case 'S': /* 32 bits, signed */
1236 if (CONSTANT_P (x))
1237 {
1238 enum immediate_class c = classify_immediate (x, mode);
1239 switch (c)
1240 {
1241 case IC_IL1:
1242 constant_to_array (mode, x, arr);
1243 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1244 val = trunc_int_for_mode (val, SImode);
1245 switch (which_immediate_load (val))
1246 {
1247 case SPU_IL:
1248 case SPU_ILA:
1249 break;
1250 case SPU_ILH:
1251 case SPU_ILHU:
1252 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1253 break;
1254 default:
1255 gcc_unreachable ();
1256 }
1257 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1258 break;
1259 case IC_FSMBI:
1260 constant_to_array (mode, x, arr);
1261 val = 0;
1262 for (i = 0; i < 16; i++)
1263 {
1264 val <<= 1;
1265 val |= arr[i] & 1;
1266 }
1267 print_operand (file, GEN_INT (val), 0);
1268 break;
1269 case IC_CPAT:
1270 constant_to_array (mode, x, arr);
1271 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1272 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1273 break;
1274 case IC_IL1s:
1275 if (xcode == HIGH)
1276 x = XEXP (x, 0);
1277 if (GET_CODE (x) == CONST_VECTOR)
1278 x = CONST_VECTOR_ELT (x, 0);
1279 output_addr_const (file, x);
1280 if (xcode == HIGH)
1281 fprintf (file, "@h");
1282 break;
1283 case IC_IL2:
1284 case IC_IL2s:
1285 case IC_FSMBI2:
1286 case IC_POOL:
1287 abort ();
1288 }
1289 }
1290 else
1291 gcc_unreachable ();
1292 return;
1293
1294 case 'C':
1295 if (xcode == CONST_INT)
1296 {
1297 /* Only 4 least significant bits are relevant for generate
1298 control word instructions. */
1299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1300 return;
1301 }
1302 break;
1303
1304 case 'M': /* print code for c*d */
1305 if (GET_CODE (x) == CONST_INT)
1306 switch (INTVAL (x))
1307 {
1308 case 1:
1309 fprintf (file, "b");
1310 break;
1311 case 2:
1312 fprintf (file, "h");
1313 break;
1314 case 4:
1315 fprintf (file, "w");
1316 break;
1317 case 8:
1318 fprintf (file, "d");
1319 break;
1320 default:
1321 gcc_unreachable();
1322 }
1323 else
1324 gcc_unreachable();
1325 return;
1326
1327 case 'N': /* Negate the operand */
1328 if (xcode == CONST_INT)
1329 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1330 else if (xcode == CONST_VECTOR)
1331 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1332 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1333 return;
1334
1335 case 'I': /* enable/disable interrupts */
1336 if (xcode == CONST_INT)
1337 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1338 return;
1339
1340 case 'b': /* branch modifiers */
1341 if (xcode == REG)
1342 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1343 else if (COMPARISON_P (x))
1344 fprintf (file, "%s", xcode == NE ? "n" : "");
1345 return;
1346
1347 case 'i': /* indirect call */
1348 if (xcode == MEM)
1349 {
1350 if (GET_CODE (XEXP (x, 0)) == REG)
1351 /* Used in indirect function calls. */
1352 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1353 else
1354 output_address (GET_MODE (x), XEXP (x, 0));
1355 }
1356 return;
1357
1358 case 'p': /* load/store */
1359 if (xcode == MEM)
1360 {
1361 x = XEXP (x, 0);
1362 xcode = GET_CODE (x);
1363 }
1364 if (xcode == AND)
1365 {
1366 x = XEXP (x, 0);
1367 xcode = GET_CODE (x);
1368 }
1369 if (xcode == REG)
1370 fprintf (file, "d");
1371 else if (xcode == CONST_INT)
1372 fprintf (file, "a");
1373 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1374 fprintf (file, "r");
1375 else if (xcode == PLUS || xcode == LO_SUM)
1376 {
1377 if (GET_CODE (XEXP (x, 1)) == REG)
1378 fprintf (file, "x");
1379 else
1380 fprintf (file, "d");
1381 }
1382 return;
1383
1384 case 'e':
1385 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1386 val &= 0x7;
1387 output_addr_const (file, GEN_INT (val));
1388 return;
1389
1390 case 'f':
1391 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1392 val &= 0x1f;
1393 output_addr_const (file, GEN_INT (val));
1394 return;
1395
1396 case 'g':
1397 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1398 val &= 0x3f;
1399 output_addr_const (file, GEN_INT (val));
1400 return;
1401
1402 case 'h':
1403 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1404 val = (val >> 3) & 0x1f;
1405 output_addr_const (file, GEN_INT (val));
1406 return;
1407
1408 case 'E':
1409 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1410 val = -val;
1411 val &= 0x7;
1412 output_addr_const (file, GEN_INT (val));
1413 return;
1414
1415 case 'F':
1416 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1417 val = -val;
1418 val &= 0x1f;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1421
1422 case 'G':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val = -val;
1425 val &= 0x3f;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1428
1429 case 'H':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = -(val & -8ll);
1432 val = (val >> 3) & 0x1f;
1433 output_addr_const (file, GEN_INT (val));
1434 return;
1435
1436 case 'v':
1437 case 'w':
1438 constant_to_array (mode, x, arr);
1439 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1440 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1441 return;
1442
1443 case 0:
1444 if (xcode == REG)
1445 fprintf (file, "%s", reg_names[REGNO (x)]);
1446 else if (xcode == MEM)
1447 output_address (GET_MODE (x), XEXP (x, 0));
1448 else if (xcode == CONST_VECTOR)
1449 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1450 else
1451 output_addr_const (file, x);
1452 return;
1453
1454 /* unused letters
1455 o qr u yz
1456 AB OPQR UVWXYZ */
1457 default:
1458 output_operand_lossage ("invalid %%xn code");
1459 }
1460 gcc_unreachable ();
1461 }
1462
1463 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1464 caller saved register. For leaf functions it is more efficient to
1465 use a volatile register because we won't need to save and restore the
1466 pic register. This routine is only valid after register allocation
1467 is completed, so we can pick an unused register. */
1468 static rtx
1469 get_pic_reg (void)
1470 {
1471 if (!reload_completed && !reload_in_progress)
1472 abort ();
1473
1474 /* If we've already made the decision, we need to keep with it. Once we've
1475 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1476 return true since the register is now live; this should not cause us to
1477 "switch back" to using pic_offset_table_rtx. */
1478 if (!cfun->machine->pic_reg)
1479 {
1480 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1481 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1482 else
1483 cfun->machine->pic_reg = pic_offset_table_rtx;
1484 }
1485
1486 return cfun->machine->pic_reg;
1487 }
1488
1489 /* Split constant addresses to handle cases that are too large.
1490 Add in the pic register when in PIC mode.
1491 Split immediates that require more than 1 instruction. */
1492 int
1493 spu_split_immediate (rtx * ops)
1494 {
1495 machine_mode mode = GET_MODE (ops[0]);
1496 enum immediate_class c = classify_immediate (ops[1], mode);
1497
1498 switch (c)
1499 {
1500 case IC_IL2:
1501 {
1502 unsigned char arrhi[16];
1503 unsigned char arrlo[16];
1504 rtx to, temp, hi, lo;
1505 int i;
1506 /* We need to do reals as ints because the constant used in the
1507 IOR might not be a legitimate real constant. */
1508 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1509 constant_to_array (mode, ops[1], arrhi);
1510 if (imode != mode)
1511 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1512 else
1513 to = ops[0];
1514 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1515 for (i = 0; i < 16; i += 4)
1516 {
1517 arrlo[i + 2] = arrhi[i + 2];
1518 arrlo[i + 3] = arrhi[i + 3];
1519 arrlo[i + 0] = arrlo[i + 1] = 0;
1520 arrhi[i + 2] = arrhi[i + 3] = 0;
1521 }
1522 hi = array_to_constant (imode, arrhi);
1523 lo = array_to_constant (imode, arrlo);
1524 emit_move_insn (temp, hi);
1525 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1526 return 1;
1527 }
1528 case IC_FSMBI2:
1529 {
1530 unsigned char arr_fsmbi[16];
1531 unsigned char arr_andbi[16];
1532 rtx to, reg_fsmbi, reg_and;
1533 int i;
1534 /* We need to do reals as ints because the constant used in the
1535 * AND might not be a legitimate real constant. */
1536 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1537 constant_to_array (mode, ops[1], arr_fsmbi);
1538 if (imode != mode)
1539 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1540 else
1541 to = ops[0];
1542 for (i = 0; i < 16; i++)
1543 if (arr_fsmbi[i] != 0)
1544 {
1545 arr_andbi[0] = arr_fsmbi[i];
1546 arr_fsmbi[i] = 0xff;
1547 }
1548 for (i = 1; i < 16; i++)
1549 arr_andbi[i] = arr_andbi[0];
1550 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1551 reg_and = array_to_constant (imode, arr_andbi);
1552 emit_move_insn (to, reg_fsmbi);
1553 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1554 return 1;
1555 }
1556 case IC_POOL:
1557 if (reload_in_progress || reload_completed)
1558 {
1559 rtx mem = force_const_mem (mode, ops[1]);
1560 if (TARGET_LARGE_MEM)
1561 {
1562 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1563 emit_move_insn (addr, XEXP (mem, 0));
1564 mem = replace_equiv_address (mem, addr);
1565 }
1566 emit_move_insn (ops[0], mem);
1567 return 1;
1568 }
1569 break;
1570 case IC_IL1s:
1571 case IC_IL2s:
1572 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1573 {
1574 if (c == IC_IL2s)
1575 {
1576 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1577 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1578 }
1579 else if (flag_pic)
1580 emit_insn (gen_pic (ops[0], ops[1]));
1581 if (flag_pic)
1582 {
1583 rtx pic_reg = get_pic_reg ();
1584 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1585 }
1586 return flag_pic || c == IC_IL2s;
1587 }
1588 break;
1589 case IC_IL1:
1590 case IC_FSMBI:
1591 case IC_CPAT:
1592 break;
1593 }
1594 return 0;
1595 }
1596
1597 /* SAVING is TRUE when we are generating the actual load and store
1598 instructions for REGNO. When determining the size of the stack
1599 needed for saving register we must allocate enough space for the
1600 worst case, because we don't always have the information early enough
1601 to not allocate it. But we can at least eliminate the actual loads
1602 and stores during the prologue/epilogue. */
1603 static int
1604 need_to_save_reg (int regno, int saving)
1605 {
1606 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1607 return 1;
1608 if (flag_pic
1609 && regno == PIC_OFFSET_TABLE_REGNUM
1610 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1611 return 1;
1612 return 0;
1613 }
1614
1615 /* This function is only correct starting with local register
1616 allocation */
1617 int
1618 spu_saved_regs_size (void)
1619 {
1620 int reg_save_size = 0;
1621 int regno;
1622
1623 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1624 if (need_to_save_reg (regno, 0))
1625 reg_save_size += 0x10;
1626 return reg_save_size;
1627 }
1628
1629 static rtx_insn *
1630 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1631 {
1632 rtx reg = gen_rtx_REG (V4SImode, regno);
1633 rtx mem =
1634 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1635 return emit_insn (gen_movv4si (mem, reg));
1636 }
1637
1638 static rtx_insn *
1639 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1640 {
1641 rtx reg = gen_rtx_REG (V4SImode, regno);
1642 rtx mem =
1643 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644 return emit_insn (gen_movv4si (reg, mem));
1645 }
1646
1647 /* This happens after reload, so we need to expand it. */
1648 static rtx_insn *
1649 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1650 {
1651 rtx_insn *insn;
1652 if (satisfies_constraint_K (GEN_INT (imm)))
1653 {
1654 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1655 }
1656 else
1657 {
1658 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1659 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1660 if (REGNO (src) == REGNO (scratch))
1661 abort ();
1662 }
1663 return insn;
1664 }
1665
1666 /* Return nonzero if this function is known to have a null epilogue. */
1667
1668 int
1669 direct_return (void)
1670 {
1671 if (reload_completed)
1672 {
1673 if (cfun->static_chain_decl == 0
1674 && (spu_saved_regs_size ()
1675 + get_frame_size ()
1676 + crtl->outgoing_args_size
1677 + crtl->args.pretend_args_size == 0)
1678 && crtl->is_leaf)
1679 return 1;
1680 }
1681 return 0;
1682 }
1683
1684 /*
1685 The stack frame looks like this:
1686 +-------------+
1687 | incoming |
1688 | args |
1689 AP -> +-------------+
1690 | $lr save |
1691 +-------------+
1692 prev SP | back chain |
1693 +-------------+
1694 | var args |
1695 | reg save | crtl->args.pretend_args_size bytes
1696 +-------------+
1697 | ... |
1698 | saved regs | spu_saved_regs_size() bytes
1699 FP -> +-------------+
1700 | ... |
1701 | vars | get_frame_size() bytes
1702 HFP -> +-------------+
1703 | ... |
1704 | outgoing |
1705 | args | crtl->outgoing_args_size bytes
1706 +-------------+
1707 | $lr of next |
1708 | frame |
1709 +-------------+
1710 | back chain |
1711 SP -> +-------------+
1712
1713 */
1714 void
1715 spu_expand_prologue (void)
1716 {
1717 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1718 HOST_WIDE_INT total_size;
1719 HOST_WIDE_INT saved_regs_size;
1720 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1721 rtx scratch_reg_0, scratch_reg_1;
1722 rtx_insn *insn;
1723 rtx real;
1724
1725 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1726 cfun->machine->pic_reg = pic_offset_table_rtx;
1727
1728 if (spu_naked_function_p (current_function_decl))
1729 return;
1730
1731 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1732 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1733
1734 saved_regs_size = spu_saved_regs_size ();
1735 total_size = size + saved_regs_size
1736 + crtl->outgoing_args_size
1737 + crtl->args.pretend_args_size;
1738
1739 if (!crtl->is_leaf
1740 || cfun->calls_alloca || total_size > 0)
1741 total_size += STACK_POINTER_OFFSET;
1742
1743 /* Save this first because code after this might use the link
1744 register as a scratch register. */
1745 if (!crtl->is_leaf)
1746 {
1747 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1748 RTX_FRAME_RELATED_P (insn) = 1;
1749 }
1750
1751 if (total_size > 0)
1752 {
1753 offset = -crtl->args.pretend_args_size;
1754 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1755 if (need_to_save_reg (regno, 1))
1756 {
1757 offset -= 16;
1758 insn = frame_emit_store (regno, sp_reg, offset);
1759 RTX_FRAME_RELATED_P (insn) = 1;
1760 }
1761 }
1762
1763 if (flag_pic && cfun->machine->pic_reg)
1764 {
1765 rtx pic_reg = cfun->machine->pic_reg;
1766 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1767 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1768 }
1769
1770 if (total_size > 0)
1771 {
1772 if (flag_stack_check || flag_stack_clash_protection)
1773 {
1774 /* We compare against total_size-1 because
1775 ($sp >= total_size) <=> ($sp > total_size-1) */
1776 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1777 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1778 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1779 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1780 {
1781 emit_move_insn (scratch_v4si, size_v4si);
1782 size_v4si = scratch_v4si;
1783 }
1784 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1785 emit_insn (gen_vec_extractv4sisi
1786 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1787 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1788 }
1789
1790 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1791 the value of the previous $sp because we save it as the back
1792 chain. */
1793 if (total_size <= 2000)
1794 {
1795 /* In this case we save the back chain first. */
1796 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1797 insn =
1798 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1799 }
1800 else
1801 {
1802 insn = emit_move_insn (scratch_reg_0, sp_reg);
1803 insn =
1804 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1805 }
1806 RTX_FRAME_RELATED_P (insn) = 1;
1807 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1808 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1809
1810 if (total_size > 2000)
1811 {
1812 /* Save the back chain ptr */
1813 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1814 }
1815
1816 if (frame_pointer_needed)
1817 {
1818 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1819 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1820 + crtl->outgoing_args_size;
1821 /* Set the new frame_pointer */
1822 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1823 RTX_FRAME_RELATED_P (insn) = 1;
1824 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1825 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1826 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1827 }
1828 }
1829
1830 if (flag_stack_usage_info)
1831 current_function_static_stack_size = total_size;
1832 }
1833
1834 void
1835 spu_expand_epilogue (bool sibcall_p)
1836 {
1837 int size = get_frame_size (), offset, regno;
1838 HOST_WIDE_INT saved_regs_size, total_size;
1839 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1840 rtx scratch_reg_0;
1841
1842 if (spu_naked_function_p (current_function_decl))
1843 return;
1844
1845 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1846
1847 saved_regs_size = spu_saved_regs_size ();
1848 total_size = size + saved_regs_size
1849 + crtl->outgoing_args_size
1850 + crtl->args.pretend_args_size;
1851
1852 if (!crtl->is_leaf
1853 || cfun->calls_alloca || total_size > 0)
1854 total_size += STACK_POINTER_OFFSET;
1855
1856 if (total_size > 0)
1857 {
1858 if (cfun->calls_alloca)
1859 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1860 else
1861 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1862
1863
1864 if (saved_regs_size > 0)
1865 {
1866 offset = -crtl->args.pretend_args_size;
1867 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1868 if (need_to_save_reg (regno, 1))
1869 {
1870 offset -= 0x10;
1871 frame_emit_load (regno, sp_reg, offset);
1872 }
1873 }
1874 }
1875
1876 if (!crtl->is_leaf)
1877 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1878
1879 if (!sibcall_p)
1880 {
1881 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1882 emit_jump_insn (gen__return ());
1883 }
1884 }
1885
1886 rtx
1887 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1888 {
1889 if (count != 0)
1890 return 0;
1891 /* This is inefficient because it ends up copying to a save-register
1892 which then gets saved even though $lr has already been saved. But
1893 it does generate better code for leaf functions and we don't need
1894 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1895 used for __builtin_return_address anyway, so maybe we don't care if
1896 it's inefficient. */
1897 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1898 }
1899 \f
1900
1901 /* Given VAL, generate a constant appropriate for MODE.
1902 If MODE is a vector mode, every element will be VAL.
1903 For TImode, VAL will be zero extended to 128 bits. */
1904 rtx
1905 spu_const (machine_mode mode, HOST_WIDE_INT val)
1906 {
1907 rtx inner;
1908
1909 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1910 || GET_MODE_CLASS (mode) == MODE_FLOAT
1911 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1912 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1913
1914 if (GET_MODE_CLASS (mode) == MODE_INT)
1915 return immed_double_const (val, 0, mode);
1916
1917 /* val is the bit representation of the float */
1918 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1919 return hwint_to_const_double (mode, val);
1920
1921 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1922 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1923 else
1924 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1925
1926 return gen_const_vec_duplicate (mode, inner);
1927 }
1928
1929 /* Create a MODE vector constant from 4 ints. */
1930 rtx
1931 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1932 {
1933 unsigned char arr[16];
1934 arr[0] = (a >> 24) & 0xff;
1935 arr[1] = (a >> 16) & 0xff;
1936 arr[2] = (a >> 8) & 0xff;
1937 arr[3] = (a >> 0) & 0xff;
1938 arr[4] = (b >> 24) & 0xff;
1939 arr[5] = (b >> 16) & 0xff;
1940 arr[6] = (b >> 8) & 0xff;
1941 arr[7] = (b >> 0) & 0xff;
1942 arr[8] = (c >> 24) & 0xff;
1943 arr[9] = (c >> 16) & 0xff;
1944 arr[10] = (c >> 8) & 0xff;
1945 arr[11] = (c >> 0) & 0xff;
1946 arr[12] = (d >> 24) & 0xff;
1947 arr[13] = (d >> 16) & 0xff;
1948 arr[14] = (d >> 8) & 0xff;
1949 arr[15] = (d >> 0) & 0xff;
1950 return array_to_constant(mode, arr);
1951 }
1952 \f
1953 /* branch hint stuff */
1954
1955 /* An array of these is used to propagate hints to predecessor blocks. */
1956 struct spu_bb_info
1957 {
1958 rtx_insn *prop_jump; /* propagated from another block */
1959 int bb_index; /* the original block. */
1960 };
1961 static struct spu_bb_info *spu_bb_info;
1962
1963 #define STOP_HINT_P(INSN) \
1964 (CALL_P(INSN) \
1965 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1966 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1967
1968 /* 1 when RTX is a hinted branch or its target. We keep track of
1969 what has been hinted so the safe-hint code can test it easily. */
1970 #define HINTED_P(RTX) \
1971 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1972
1973 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1974 #define SCHED_ON_EVEN_P(RTX) \
1975 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1976
1977 /* Emit a nop for INSN such that the two will dual issue. This assumes
1978 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1979 We check for TImode to handle a MULTI1 insn which has dual issued its
1980 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1981 static void
1982 emit_nop_for_insn (rtx_insn *insn)
1983 {
1984 int p;
1985 rtx_insn *new_insn;
1986
1987 /* We need to handle JUMP_TABLE_DATA separately. */
1988 if (JUMP_TABLE_DATA_P (insn))
1989 {
1990 new_insn = emit_insn_after (gen_lnop(), insn);
1991 recog_memoized (new_insn);
1992 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1993 return;
1994 }
1995
1996 p = get_pipe (insn);
1997 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1998 new_insn = emit_insn_after (gen_lnop (), insn);
1999 else if (p == 1 && GET_MODE (insn) == TImode)
2000 {
2001 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2002 PUT_MODE (new_insn, TImode);
2003 PUT_MODE (insn, VOIDmode);
2004 }
2005 else
2006 new_insn = emit_insn_after (gen_lnop (), insn);
2007 recog_memoized (new_insn);
2008 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2009 }
2010
2011 /* Insert nops in basic blocks to meet dual issue alignment
2012 requirements. Also make sure hbrp and hint instructions are at least
2013 one cycle apart, possibly inserting a nop. */
2014 static void
2015 pad_bb(void)
2016 {
2017 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2018 int length;
2019 int addr;
2020
2021 /* This sets up INSN_ADDRESSES. */
2022 shorten_branches (get_insns ());
2023
2024 /* Keep track of length added by nops. */
2025 length = 0;
2026
2027 prev_insn = 0;
2028 insn = get_insns ();
2029 if (!active_insn_p (insn))
2030 insn = next_active_insn (insn);
2031 for (; insn; insn = next_insn)
2032 {
2033 next_insn = next_active_insn (insn);
2034 if (INSN_P (insn)
2035 && (INSN_CODE (insn) == CODE_FOR_iprefetch
2036 || INSN_CODE (insn) == CODE_FOR_hbr))
2037 {
2038 if (hbr_insn)
2039 {
2040 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2041 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2042 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2043 || (a1 - a0 == 4))
2044 {
2045 prev_insn = emit_insn_before (gen_lnop (), insn);
2046 PUT_MODE (prev_insn, GET_MODE (insn));
2047 PUT_MODE (insn, TImode);
2048 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2049 length += 4;
2050 }
2051 }
2052 hbr_insn = insn;
2053 }
2054 if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2055 {
2056 if (GET_MODE (insn) == TImode)
2057 PUT_MODE (next_insn, TImode);
2058 insn = next_insn;
2059 next_insn = next_active_insn (insn);
2060 }
2061 addr = INSN_ADDRESSES (INSN_UID (insn));
2062 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2063 {
2064 if (((addr + length) & 7) != 0)
2065 {
2066 emit_nop_for_insn (prev_insn);
2067 length += 4;
2068 }
2069 }
2070 else if (GET_MODE (insn) == TImode
2071 && ((next_insn && GET_MODE (next_insn) != TImode)
2072 || get_attr_type (insn) == TYPE_MULTI0)
2073 && ((addr + length) & 7) != 0)
2074 {
2075 /* prev_insn will always be set because the first insn is
2076 always 8-byte aligned. */
2077 emit_nop_for_insn (prev_insn);
2078 length += 4;
2079 }
2080 prev_insn = insn;
2081 }
2082 }
2083
2084 \f
2085 /* Routines for branch hints. */
2086
2087 static void
2088 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2089 int distance, sbitmap blocks)
2090 {
2091 rtx_insn *hint;
2092 rtx_insn *insn;
2093 rtx_jump_table_data *table;
2094
2095 if (before == 0 || branch == 0 || target == 0)
2096 return;
2097
2098 /* While scheduling we require hints to be no further than 600, so
2099 we need to enforce that here too */
2100 if (distance > 600)
2101 return;
2102
2103 /* If we have a Basic block note, emit it after the basic block note. */
2104 if (NOTE_INSN_BASIC_BLOCK_P (before))
2105 before = NEXT_INSN (before);
2106
2107 rtx_code_label *branch_label = gen_label_rtx ();
2108 LABEL_NUSES (branch_label)++;
2109 LABEL_PRESERVE_P (branch_label) = 1;
2110 insn = emit_label_before (branch_label, branch);
2111 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2112 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2113
2114 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2115 recog_memoized (hint);
2116 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2117 HINTED_P (branch) = 1;
2118
2119 if (GET_CODE (target) == LABEL_REF)
2120 HINTED_P (XEXP (target, 0)) = 1;
2121 else if (tablejump_p (branch, 0, &table))
2122 {
2123 rtvec vec;
2124 int j;
2125 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2126 vec = XVEC (PATTERN (table), 0);
2127 else
2128 vec = XVEC (PATTERN (table), 1);
2129 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2130 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2131 }
2132
2133 if (distance >= 588)
2134 {
2135 /* Make sure the hint isn't scheduled any earlier than this point,
2136 which could make it too far for the branch offest to fit */
2137 insn = emit_insn_before (gen_blockage (), hint);
2138 recog_memoized (insn);
2139 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2140 }
2141 else if (distance <= 8 * 4)
2142 {
2143 /* To guarantee at least 8 insns between the hint and branch we
2144 insert nops. */
2145 int d;
2146 for (d = distance; d < 8 * 4; d += 4)
2147 {
2148 insn =
2149 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2150 recog_memoized (insn);
2151 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2152 }
2153
2154 /* Make sure any nops inserted aren't scheduled before the hint. */
2155 insn = emit_insn_after (gen_blockage (), hint);
2156 recog_memoized (insn);
2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2158
2159 /* Make sure any nops inserted aren't scheduled after the call. */
2160 if (CALL_P (branch) && distance < 8 * 4)
2161 {
2162 insn = emit_insn_before (gen_blockage (), branch);
2163 recog_memoized (insn);
2164 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2165 }
2166 }
2167 }
2168
2169 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2170 the rtx for the branch target. */
2171 static rtx
2172 get_branch_target (rtx_insn *branch)
2173 {
2174 if (JUMP_P (branch))
2175 {
2176 rtx set, src;
2177
2178 /* Return statements */
2179 if (GET_CODE (PATTERN (branch)) == RETURN)
2180 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2181
2182 /* ASM GOTOs. */
2183 if (extract_asm_operands (PATTERN (branch)) != NULL)
2184 return NULL;
2185
2186 set = single_set (branch);
2187 src = SET_SRC (set);
2188 if (GET_CODE (SET_DEST (set)) != PC)
2189 abort ();
2190
2191 if (GET_CODE (src) == IF_THEN_ELSE)
2192 {
2193 rtx lab = 0;
2194 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2195 if (note)
2196 {
2197 /* If the more probable case is not a fall through, then
2198 try a branch hint. */
2199 int prob = profile_probability::from_reg_br_prob_note
2200 (XINT (note, 0)).to_reg_br_prob_base ();
2201 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2202 && GET_CODE (XEXP (src, 1)) != PC)
2203 lab = XEXP (src, 1);
2204 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2205 && GET_CODE (XEXP (src, 2)) != PC)
2206 lab = XEXP (src, 2);
2207 }
2208 if (lab)
2209 {
2210 if (GET_CODE (lab) == RETURN)
2211 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2212 return lab;
2213 }
2214 return 0;
2215 }
2216
2217 return src;
2218 }
2219 else if (CALL_P (branch))
2220 {
2221 rtx call;
2222 /* All of our call patterns are in a PARALLEL and the CALL is
2223 the first pattern in the PARALLEL. */
2224 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2225 abort ();
2226 call = XVECEXP (PATTERN (branch), 0, 0);
2227 if (GET_CODE (call) == SET)
2228 call = SET_SRC (call);
2229 if (GET_CODE (call) != CALL)
2230 abort ();
2231 return XEXP (XEXP (call, 0), 0);
2232 }
2233 return 0;
2234 }
2235
2236 /* The special $hbr register is used to prevent the insn scheduler from
2237 moving hbr insns across instructions which invalidate them. It
2238 should only be used in a clobber, and this function searches for
2239 insns which clobber it. */
2240 static bool
2241 insn_clobbers_hbr (rtx_insn *insn)
2242 {
2243 if (INSN_P (insn)
2244 && GET_CODE (PATTERN (insn)) == PARALLEL)
2245 {
2246 rtx parallel = PATTERN (insn);
2247 rtx clobber;
2248 int j;
2249 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2250 {
2251 clobber = XVECEXP (parallel, 0, j);
2252 if (GET_CODE (clobber) == CLOBBER
2253 && GET_CODE (XEXP (clobber, 0)) == REG
2254 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2255 return 1;
2256 }
2257 }
2258 return 0;
2259 }
2260
2261 /* Search up to 32 insns starting at FIRST:
2262 - at any kind of hinted branch, just return
2263 - at any unconditional branch in the first 15 insns, just return
2264 - at a call or indirect branch, after the first 15 insns, force it to
2265 an even address and return
2266 - at any unconditional branch, after the first 15 insns, force it to
2267 an even address.
2268 At then end of the search, insert an hbrp within 4 insns of FIRST,
2269 and an hbrp within 16 instructions of FIRST.
2270 */
2271 static void
2272 insert_hbrp_for_ilb_runout (rtx_insn *first)
2273 {
2274 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2275 int addr = 0, length, first_addr = -1;
2276 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2277 int insert_lnop_after = 0;
2278 for (insn = first; insn; insn = NEXT_INSN (insn))
2279 if (INSN_P (insn))
2280 {
2281 if (first_addr == -1)
2282 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2283 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2284 length = get_attr_length (insn);
2285
2286 if (before_4 == 0 && addr + length >= 4 * 4)
2287 before_4 = insn;
2288 /* We test for 14 instructions because the first hbrp will add
2289 up to 2 instructions. */
2290 if (before_16 == 0 && addr + length >= 14 * 4)
2291 before_16 = insn;
2292
2293 if (INSN_CODE (insn) == CODE_FOR_hbr)
2294 {
2295 /* Make sure an hbrp is at least 2 cycles away from a hint.
2296 Insert an lnop after the hbrp when necessary. */
2297 if (before_4 == 0 && addr > 0)
2298 {
2299 before_4 = insn;
2300 insert_lnop_after |= 1;
2301 }
2302 else if (before_4 && addr <= 4 * 4)
2303 insert_lnop_after |= 1;
2304 if (before_16 == 0 && addr > 10 * 4)
2305 {
2306 before_16 = insn;
2307 insert_lnop_after |= 2;
2308 }
2309 else if (before_16 && addr <= 14 * 4)
2310 insert_lnop_after |= 2;
2311 }
2312
2313 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2314 {
2315 if (addr < hbrp_addr0)
2316 hbrp_addr0 = addr;
2317 else if (addr < hbrp_addr1)
2318 hbrp_addr1 = addr;
2319 }
2320
2321 if (CALL_P (insn) || JUMP_P (insn))
2322 {
2323 if (HINTED_P (insn))
2324 return;
2325
2326 /* Any branch after the first 15 insns should be on an even
2327 address to avoid a special case branch. There might be
2328 some nops and/or hbrps inserted, so we test after 10
2329 insns. */
2330 if (addr > 10 * 4)
2331 SCHED_ON_EVEN_P (insn) = 1;
2332 }
2333
2334 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2335 return;
2336
2337
2338 if (addr + length >= 32 * 4)
2339 {
2340 gcc_assert (before_4 && before_16);
2341 if (hbrp_addr0 > 4 * 4)
2342 {
2343 insn =
2344 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2345 recog_memoized (insn);
2346 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2347 INSN_ADDRESSES_NEW (insn,
2348 INSN_ADDRESSES (INSN_UID (before_4)));
2349 PUT_MODE (insn, GET_MODE (before_4));
2350 PUT_MODE (before_4, TImode);
2351 if (insert_lnop_after & 1)
2352 {
2353 insn = emit_insn_before (gen_lnop (), before_4);
2354 recog_memoized (insn);
2355 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2356 INSN_ADDRESSES_NEW (insn,
2357 INSN_ADDRESSES (INSN_UID (before_4)));
2358 PUT_MODE (insn, TImode);
2359 }
2360 }
2361 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2362 && hbrp_addr1 > 16 * 4)
2363 {
2364 insn =
2365 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2366 recog_memoized (insn);
2367 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2368 INSN_ADDRESSES_NEW (insn,
2369 INSN_ADDRESSES (INSN_UID (before_16)));
2370 PUT_MODE (insn, GET_MODE (before_16));
2371 PUT_MODE (before_16, TImode);
2372 if (insert_lnop_after & 2)
2373 {
2374 insn = emit_insn_before (gen_lnop (), before_16);
2375 recog_memoized (insn);
2376 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2377 INSN_ADDRESSES_NEW (insn,
2378 INSN_ADDRESSES (INSN_UID
2379 (before_16)));
2380 PUT_MODE (insn, TImode);
2381 }
2382 }
2383 return;
2384 }
2385 }
2386 else if (BARRIER_P (insn))
2387 return;
2388
2389 }
2390
2391 /* The SPU might hang when it executes 48 inline instructions after a
2392 hinted branch jumps to its hinted target. The beginning of a
2393 function and the return from a call might have been hinted, and
2394 must be handled as well. To prevent a hang we insert 2 hbrps. The
2395 first should be within 6 insns of the branch target. The second
2396 should be within 22 insns of the branch target. When determining
2397 if hbrps are necessary, we look for only 32 inline instructions,
2398 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2399 when inserting new hbrps, we insert them within 4 and 16 insns of
2400 the target. */
2401 static void
2402 insert_hbrp (void)
2403 {
2404 rtx_insn *insn;
2405 if (TARGET_SAFE_HINTS)
2406 {
2407 shorten_branches (get_insns ());
2408 /* Insert hbrp at beginning of function */
2409 insn = next_active_insn (get_insns ());
2410 if (insn)
2411 insert_hbrp_for_ilb_runout (insn);
2412 /* Insert hbrp after hinted targets. */
2413 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2414 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2415 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2416 }
2417 }
2418
2419 static int in_spu_reorg;
2420
2421 static void
2422 spu_var_tracking (void)
2423 {
2424 if (flag_var_tracking)
2425 {
2426 df_analyze ();
2427 timevar_push (TV_VAR_TRACKING);
2428 variable_tracking_main ();
2429 timevar_pop (TV_VAR_TRACKING);
2430 df_finish_pass (false);
2431 }
2432 }
2433
2434 /* Insert branch hints. There are no branch optimizations after this
2435 pass, so it's safe to set our branch hints now. */
2436 static void
2437 spu_machine_dependent_reorg (void)
2438 {
2439 sbitmap blocks;
2440 basic_block bb;
2441 rtx_insn *branch, *insn;
2442 rtx branch_target = 0;
2443 int branch_addr = 0, insn_addr, required_dist = 0;
2444 int i;
2445 unsigned int j;
2446
2447 if (!TARGET_BRANCH_HINTS || optimize == 0)
2448 {
2449 /* We still do it for unoptimized code because an external
2450 function might have hinted a call or return. */
2451 compute_bb_for_insn ();
2452 insert_hbrp ();
2453 pad_bb ();
2454 spu_var_tracking ();
2455 free_bb_for_insn ();
2456 return;
2457 }
2458
2459 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2460 bitmap_clear (blocks);
2461
2462 in_spu_reorg = 1;
2463 compute_bb_for_insn ();
2464
2465 /* (Re-)discover loops so that bb->loop_father can be used
2466 in the analysis below. */
2467 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2468
2469 compact_blocks ();
2470
2471 spu_bb_info =
2472 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2473 sizeof (struct spu_bb_info));
2474
2475 /* We need exact insn addresses and lengths. */
2476 shorten_branches (get_insns ());
2477
2478 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2479 {
2480 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2481 branch = 0;
2482 if (spu_bb_info[i].prop_jump)
2483 {
2484 branch = spu_bb_info[i].prop_jump;
2485 branch_target = get_branch_target (branch);
2486 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2487 required_dist = spu_hint_dist;
2488 }
2489 /* Search from end of a block to beginning. In this loop, find
2490 jumps which need a branch and emit them only when:
2491 - it's an indirect branch and we're at the insn which sets
2492 the register
2493 - we're at an insn that will invalidate the hint. e.g., a
2494 call, another hint insn, inline asm that clobbers $hbr, and
2495 some inlined operations (divmodsi4). Don't consider jumps
2496 because they are only at the end of a block and are
2497 considered when we are deciding whether to propagate
2498 - we're getting too far away from the branch. The hbr insns
2499 only have a signed 10 bit offset
2500 We go back as far as possible so the branch will be considered
2501 for propagation when we get to the beginning of the block. */
2502 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2503 {
2504 if (INSN_P (insn))
2505 {
2506 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2507 if (branch
2508 && ((GET_CODE (branch_target) == REG
2509 && set_of (branch_target, insn) != NULL_RTX)
2510 || insn_clobbers_hbr (insn)
2511 || branch_addr - insn_addr > 600))
2512 {
2513 rtx_insn *next = NEXT_INSN (insn);
2514 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2515 if (insn != BB_END (bb)
2516 && branch_addr - next_addr >= required_dist)
2517 {
2518 if (dump_file)
2519 fprintf (dump_file,
2520 "hint for %i in block %i before %i\n",
2521 INSN_UID (branch), bb->index,
2522 INSN_UID (next));
2523 spu_emit_branch_hint (next, branch, branch_target,
2524 branch_addr - next_addr, blocks);
2525 }
2526 branch = 0;
2527 }
2528
2529 /* JUMP_P will only be true at the end of a block. When
2530 branch is already set it means we've previously decided
2531 to propagate a hint for that branch into this block. */
2532 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2533 {
2534 branch = 0;
2535 if ((branch_target = get_branch_target (insn)))
2536 {
2537 branch = insn;
2538 branch_addr = insn_addr;
2539 required_dist = spu_hint_dist;
2540 }
2541 }
2542 }
2543 if (insn == BB_HEAD (bb))
2544 break;
2545 }
2546
2547 if (branch)
2548 {
2549 /* If we haven't emitted a hint for this branch yet, it might
2550 be profitable to emit it in one of the predecessor blocks,
2551 especially for loops. */
2552 rtx_insn *bbend;
2553 basic_block prev = 0, prop = 0, prev2 = 0;
2554 int loop_exit = 0, simple_loop = 0;
2555 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2556
2557 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2558 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2559 prev = EDGE_PRED (bb, j)->src;
2560 else
2561 prev2 = EDGE_PRED (bb, j)->src;
2562
2563 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2564 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2565 loop_exit = 1;
2566 else if (EDGE_SUCC (bb, j)->dest == bb)
2567 simple_loop = 1;
2568
2569 /* If this branch is a loop exit then propagate to previous
2570 fallthru block. This catches the cases when it is a simple
2571 loop or when there is an initial branch into the loop. */
2572 if (prev && (loop_exit || simple_loop)
2573 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2574 prop = prev;
2575
2576 /* If there is only one adjacent predecessor. Don't propagate
2577 outside this loop. */
2578 else if (prev && single_pred_p (bb)
2579 && prev->loop_father == bb->loop_father)
2580 prop = prev;
2581
2582 /* If this is the JOIN block of a simple IF-THEN then
2583 propagate the hint to the HEADER block. */
2584 else if (prev && prev2
2585 && EDGE_COUNT (bb->preds) == 2
2586 && EDGE_COUNT (prev->preds) == 1
2587 && EDGE_PRED (prev, 0)->src == prev2
2588 && prev2->loop_father == bb->loop_father
2589 && GET_CODE (branch_target) != REG)
2590 prop = prev;
2591
2592 /* Don't propagate when:
2593 - this is a simple loop and the hint would be too far
2594 - this is not a simple loop and there are 16 insns in
2595 this block already
2596 - the predecessor block ends in a branch that will be
2597 hinted
2598 - the predecessor block ends in an insn that invalidates
2599 the hint */
2600 if (prop
2601 && prop->index >= 0
2602 && (bbend = BB_END (prop))
2603 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2604 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2605 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2606 {
2607 if (dump_file)
2608 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2609 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2610 bb->index, prop->index, bb_loop_depth (bb),
2611 INSN_UID (branch), loop_exit, simple_loop,
2612 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2613
2614 spu_bb_info[prop->index].prop_jump = branch;
2615 spu_bb_info[prop->index].bb_index = i;
2616 }
2617 else if (branch_addr - next_addr >= required_dist)
2618 {
2619 if (dump_file)
2620 fprintf (dump_file, "hint for %i in block %i before %i\n",
2621 INSN_UID (branch), bb->index,
2622 INSN_UID (NEXT_INSN (insn)));
2623 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2624 branch_addr - next_addr, blocks);
2625 }
2626 branch = 0;
2627 }
2628 }
2629 free (spu_bb_info);
2630
2631 if (!bitmap_empty_p (blocks))
2632 find_many_sub_basic_blocks (blocks);
2633
2634 /* We have to schedule to make sure alignment is ok. */
2635 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2636
2637 /* The hints need to be scheduled, so call it again. */
2638 schedule_insns ();
2639 df_finish_pass (true);
2640
2641 insert_hbrp ();
2642
2643 pad_bb ();
2644
2645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2647 {
2648 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2649 between its branch label and the branch . We don't move the
2650 label because GCC expects it at the beginning of the block. */
2651 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2652 rtx label_ref = XVECEXP (unspec, 0, 0);
2653 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2654 rtx_insn *branch;
2655 int offset = 0;
2656 for (branch = NEXT_INSN (label);
2657 !JUMP_P (branch) && !CALL_P (branch);
2658 branch = NEXT_INSN (branch))
2659 if (NONJUMP_INSN_P (branch))
2660 offset += get_attr_length (branch);
2661 if (offset > 0)
2662 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2663 }
2664
2665 spu_var_tracking ();
2666
2667 loop_optimizer_finalize ();
2668
2669 free_bb_for_insn ();
2670
2671 in_spu_reorg = 0;
2672 }
2673 \f
2674
2675 /* Insn scheduling routines, primarily for dual issue. */
2676 static int
2677 spu_sched_issue_rate (void)
2678 {
2679 return 2;
2680 }
2681
2682 static int
2683 uses_ls_unit(rtx_insn *insn)
2684 {
2685 rtx set = single_set (insn);
2686 if (set != 0
2687 && (GET_CODE (SET_DEST (set)) == MEM
2688 || GET_CODE (SET_SRC (set)) == MEM))
2689 return 1;
2690 return 0;
2691 }
2692
2693 static int
2694 get_pipe (rtx_insn *insn)
2695 {
2696 enum attr_type t;
2697 /* Handle inline asm */
2698 if (INSN_CODE (insn) == -1)
2699 return -1;
2700 t = get_attr_type (insn);
2701 switch (t)
2702 {
2703 case TYPE_CONVERT:
2704 return -2;
2705 case TYPE_MULTI0:
2706 return -1;
2707
2708 case TYPE_FX2:
2709 case TYPE_FX3:
2710 case TYPE_SPR:
2711 case TYPE_NOP:
2712 case TYPE_FXB:
2713 case TYPE_FPD:
2714 case TYPE_FP6:
2715 case TYPE_FP7:
2716 return 0;
2717
2718 case TYPE_LNOP:
2719 case TYPE_SHUF:
2720 case TYPE_LOAD:
2721 case TYPE_STORE:
2722 case TYPE_BR:
2723 case TYPE_MULTI1:
2724 case TYPE_HBR:
2725 case TYPE_IPREFETCH:
2726 return 1;
2727 default:
2728 abort ();
2729 }
2730 }
2731
2732
2733 /* haifa-sched.c has a static variable that keeps track of the current
2734 cycle. It is passed to spu_sched_reorder, and we record it here for
2735 use by spu_sched_variable_issue. It won't be accurate if the
2736 scheduler updates it's clock_var between the two calls. */
2737 static int clock_var;
2738
2739 /* This is used to keep track of insn alignment. Set to 0 at the
2740 beginning of each block and increased by the "length" attr of each
2741 insn scheduled. */
2742 static int spu_sched_length;
2743
2744 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2745 ready list appropriately in spu_sched_reorder(). */
2746 static int pipe0_clock;
2747 static int pipe1_clock;
2748
2749 static int prev_clock_var;
2750
2751 static int prev_priority;
2752
2753 /* The SPU needs to load the next ilb sometime during the execution of
2754 the previous ilb. There is a potential conflict if every cycle has a
2755 load or store. To avoid the conflict we make sure the load/store
2756 unit is free for at least one cycle during the execution of insns in
2757 the previous ilb. */
2758 static int spu_ls_first;
2759 static int prev_ls_clock;
2760
2761 static void
2762 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2763 int max_ready ATTRIBUTE_UNUSED)
2764 {
2765 spu_sched_length = 0;
2766 }
2767
2768 static void
2769 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2770 int max_ready ATTRIBUTE_UNUSED)
2771 {
2772 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2773 {
2774 /* When any block might be at least 8-byte aligned, assume they
2775 will all be at least 8-byte aligned to make sure dual issue
2776 works out correctly. */
2777 spu_sched_length = 0;
2778 }
2779 spu_ls_first = INT_MAX;
2780 clock_var = -1;
2781 prev_ls_clock = -1;
2782 pipe0_clock = -1;
2783 pipe1_clock = -1;
2784 prev_clock_var = -1;
2785 prev_priority = -1;
2786 }
2787
2788 static int
2789 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2790 int verbose ATTRIBUTE_UNUSED,
2791 rtx_insn *insn, int more)
2792 {
2793 int len;
2794 int p;
2795 if (GET_CODE (PATTERN (insn)) == USE
2796 || GET_CODE (PATTERN (insn)) == CLOBBER
2797 || (len = get_attr_length (insn)) == 0)
2798 return more;
2799
2800 spu_sched_length += len;
2801
2802 /* Reset on inline asm */
2803 if (INSN_CODE (insn) == -1)
2804 {
2805 spu_ls_first = INT_MAX;
2806 pipe0_clock = -1;
2807 pipe1_clock = -1;
2808 return 0;
2809 }
2810 p = get_pipe (insn);
2811 if (p == 0)
2812 pipe0_clock = clock_var;
2813 else
2814 pipe1_clock = clock_var;
2815
2816 if (in_spu_reorg)
2817 {
2818 if (clock_var - prev_ls_clock > 1
2819 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2820 spu_ls_first = INT_MAX;
2821 if (uses_ls_unit (insn))
2822 {
2823 if (spu_ls_first == INT_MAX)
2824 spu_ls_first = spu_sched_length;
2825 prev_ls_clock = clock_var;
2826 }
2827
2828 /* The scheduler hasn't inserted the nop, but we will later on.
2829 Include those nops in spu_sched_length. */
2830 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2831 spu_sched_length += 4;
2832 prev_clock_var = clock_var;
2833
2834 /* more is -1 when called from spu_sched_reorder for new insns
2835 that don't have INSN_PRIORITY */
2836 if (more >= 0)
2837 prev_priority = INSN_PRIORITY (insn);
2838 }
2839
2840 /* Always try issuing more insns. spu_sched_reorder will decide
2841 when the cycle should be advanced. */
2842 return 1;
2843 }
2844
2845 /* This function is called for both TARGET_SCHED_REORDER and
2846 TARGET_SCHED_REORDER2. */
2847 static int
2848 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2849 rtx_insn **ready, int *nreadyp, int clock)
2850 {
2851 int i, nready = *nreadyp;
2852 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2853 rtx_insn *insn;
2854
2855 clock_var = clock;
2856
2857 if (nready <= 0 || pipe1_clock >= clock)
2858 return 0;
2859
2860 /* Find any rtl insns that don't generate assembly insns and schedule
2861 them first. */
2862 for (i = nready - 1; i >= 0; i--)
2863 {
2864 insn = ready[i];
2865 if (INSN_CODE (insn) == -1
2866 || INSN_CODE (insn) == CODE_FOR_blockage
2867 || (INSN_P (insn) && get_attr_length (insn) == 0))
2868 {
2869 ready[i] = ready[nready - 1];
2870 ready[nready - 1] = insn;
2871 return 1;
2872 }
2873 }
2874
2875 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2876 for (i = 0; i < nready; i++)
2877 if (INSN_CODE (ready[i]) != -1)
2878 {
2879 insn = ready[i];
2880 switch (get_attr_type (insn))
2881 {
2882 default:
2883 case TYPE_MULTI0:
2884 case TYPE_CONVERT:
2885 case TYPE_FX2:
2886 case TYPE_FX3:
2887 case TYPE_SPR:
2888 case TYPE_NOP:
2889 case TYPE_FXB:
2890 case TYPE_FPD:
2891 case TYPE_FP6:
2892 case TYPE_FP7:
2893 pipe_0 = i;
2894 break;
2895 case TYPE_LOAD:
2896 case TYPE_STORE:
2897 pipe_ls = i;
2898 /* FALLTHRU */
2899 case TYPE_LNOP:
2900 case TYPE_SHUF:
2901 case TYPE_BR:
2902 case TYPE_MULTI1:
2903 case TYPE_HBR:
2904 pipe_1 = i;
2905 break;
2906 case TYPE_IPREFETCH:
2907 pipe_hbrp = i;
2908 break;
2909 }
2910 }
2911
2912 /* In the first scheduling phase, schedule loads and stores together
2913 to increase the chance they will get merged during postreload CSE. */
2914 if (!reload_completed && pipe_ls >= 0)
2915 {
2916 insn = ready[pipe_ls];
2917 ready[pipe_ls] = ready[nready - 1];
2918 ready[nready - 1] = insn;
2919 return 1;
2920 }
2921
2922 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2923 if (pipe_hbrp >= 0)
2924 pipe_1 = pipe_hbrp;
2925
2926 /* When we have loads/stores in every cycle of the last 15 insns and
2927 we are about to schedule another load/store, emit an hbrp insn
2928 instead. */
2929 if (in_spu_reorg
2930 && spu_sched_length - spu_ls_first >= 4 * 15
2931 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2932 {
2933 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2934 recog_memoized (insn);
2935 if (pipe0_clock < clock)
2936 PUT_MODE (insn, TImode);
2937 spu_sched_variable_issue (file, verbose, insn, -1);
2938 return 0;
2939 }
2940
2941 /* In general, we want to emit nops to increase dual issue, but dual
2942 issue isn't faster when one of the insns could be scheduled later
2943 without effecting the critical path. We look at INSN_PRIORITY to
2944 make a good guess, but it isn't perfect so -mdual-nops=n can be
2945 used to effect it. */
2946 if (in_spu_reorg && spu_dual_nops < 10)
2947 {
2948 /* When we are at an even address and we are not issuing nops to
2949 improve scheduling then we need to advance the cycle. */
2950 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2951 && (spu_dual_nops == 0
2952 || (pipe_1 != -1
2953 && prev_priority >
2954 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2955 return 0;
2956
2957 /* When at an odd address, schedule the highest priority insn
2958 without considering pipeline. */
2959 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2960 && (spu_dual_nops == 0
2961 || (prev_priority >
2962 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2963 return 1;
2964 }
2965
2966
2967 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2968 pipe0 insn in the ready list, schedule it. */
2969 if (pipe0_clock < clock && pipe_0 >= 0)
2970 schedule_i = pipe_0;
2971
2972 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2973 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2974 else
2975 schedule_i = pipe_1;
2976
2977 if (schedule_i > -1)
2978 {
2979 insn = ready[schedule_i];
2980 ready[schedule_i] = ready[nready - 1];
2981 ready[nready - 1] = insn;
2982 return 1;
2983 }
2984 return 0;
2985 }
2986
2987 /* INSN is dependent on DEP_INSN. */
2988 static int
2989 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2990 int cost, unsigned int)
2991 {
2992 rtx set;
2993
2994 /* The blockage pattern is used to prevent instructions from being
2995 moved across it and has no cost. */
2996 if (INSN_CODE (insn) == CODE_FOR_blockage
2997 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2998 return 0;
2999
3000 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3001 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3002 return 0;
3003
3004 /* Make sure hbrps are spread out. */
3005 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3006 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3007 return 8;
3008
3009 /* Make sure hints and hbrps are 2 cycles apart. */
3010 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (insn) == CODE_FOR_hbr)
3012 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3013 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3014 return 2;
3015
3016 /* An hbrp has no real dependency on other insns. */
3017 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3018 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3019 return 0;
3020
3021 /* Assuming that it is unlikely an argument register will be used in
3022 the first cycle of the called function, we reduce the cost for
3023 slightly better scheduling of dep_insn. When not hinted, the
3024 mispredicted branch would hide the cost as well. */
3025 if (CALL_P (insn))
3026 {
3027 rtx target = get_branch_target (insn);
3028 if (GET_CODE (target) != REG || !set_of (target, insn))
3029 return cost - 2;
3030 return cost;
3031 }
3032
3033 /* And when returning from a function, let's assume the return values
3034 are completed sooner too. */
3035 if (CALL_P (dep_insn))
3036 return cost - 2;
3037
3038 /* Make sure an instruction that loads from the back chain is schedule
3039 away from the return instruction so a hint is more likely to get
3040 issued. */
3041 if (INSN_CODE (insn) == CODE_FOR__return
3042 && (set = single_set (dep_insn))
3043 && GET_CODE (SET_DEST (set)) == REG
3044 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3045 return 20;
3046
3047 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3048 scheduler makes every insn in a block anti-dependent on the final
3049 jump_insn. We adjust here so higher cost insns will get scheduled
3050 earlier. */
3051 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3052 return insn_sched_cost (dep_insn) - 3;
3053
3054 return cost;
3055 }
3056 \f
3057 /* Create a CONST_DOUBLE from a string. */
3058 rtx
3059 spu_float_const (const char *string, machine_mode mode)
3060 {
3061 REAL_VALUE_TYPE value;
3062 value = REAL_VALUE_ATOF (string, mode);
3063 return const_double_from_real_value (value, mode);
3064 }
3065
3066 int
3067 spu_constant_address_p (rtx x)
3068 {
3069 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3070 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3071 || GET_CODE (x) == HIGH);
3072 }
3073
3074 static enum spu_immediate
3075 which_immediate_load (HOST_WIDE_INT val)
3076 {
3077 gcc_assert (val == trunc_int_for_mode (val, SImode));
3078
3079 if (val >= -0x8000 && val <= 0x7fff)
3080 return SPU_IL;
3081 if (val >= 0 && val <= 0x3ffff)
3082 return SPU_ILA;
3083 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3084 return SPU_ILH;
3085 if ((val & 0xffff) == 0)
3086 return SPU_ILHU;
3087
3088 return SPU_NONE;
3089 }
3090
3091 /* Return true when OP can be loaded by one of the il instructions, or
3092 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3093 int
3094 immediate_load_p (rtx op, machine_mode mode)
3095 {
3096 if (CONSTANT_P (op))
3097 {
3098 enum immediate_class c = classify_immediate (op, mode);
3099 return c == IC_IL1 || c == IC_IL1s
3100 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3101 }
3102 return 0;
3103 }
3104
3105 /* Return true if the first SIZE bytes of arr is a constant that can be
3106 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3107 represent the size and offset of the instruction to use. */
3108 static int
3109 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3110 {
3111 int cpat, run, i, start;
3112 cpat = 1;
3113 run = 0;
3114 start = -1;
3115 for (i = 0; i < size && cpat; i++)
3116 if (arr[i] != i+16)
3117 {
3118 if (!run)
3119 {
3120 start = i;
3121 if (arr[i] == 3)
3122 run = 1;
3123 else if (arr[i] == 2 && arr[i+1] == 3)
3124 run = 2;
3125 else if (arr[i] == 0)
3126 {
3127 while (arr[i+run] == run && i+run < 16)
3128 run++;
3129 if (run != 4 && run != 8)
3130 cpat = 0;
3131 }
3132 else
3133 cpat = 0;
3134 if ((i & (run-1)) != 0)
3135 cpat = 0;
3136 i += run;
3137 }
3138 else
3139 cpat = 0;
3140 }
3141 if (cpat && (run || size < 16))
3142 {
3143 if (run == 0)
3144 run = 1;
3145 if (prun)
3146 *prun = run;
3147 if (pstart)
3148 *pstart = start == -1 ? 16-run : start;
3149 return 1;
3150 }
3151 return 0;
3152 }
3153
3154 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3155 it into a register. MODE is only valid when OP is a CONST_INT. */
3156 static enum immediate_class
3157 classify_immediate (rtx op, machine_mode mode)
3158 {
3159 HOST_WIDE_INT val;
3160 unsigned char arr[16];
3161 int i, j, repeated, fsmbi, repeat;
3162
3163 gcc_assert (CONSTANT_P (op));
3164
3165 if (GET_MODE (op) != VOIDmode)
3166 mode = GET_MODE (op);
3167
3168 /* A V4SI const_vector with all identical symbols is ok. */
3169 if (!flag_pic
3170 && mode == V4SImode
3171 && GET_CODE (op) == CONST_VECTOR
3172 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3173 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3174 op = unwrap_const_vec_duplicate (op);
3175
3176 switch (GET_CODE (op))
3177 {
3178 case SYMBOL_REF:
3179 case LABEL_REF:
3180 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3181
3182 case CONST:
3183 /* We can never know if the resulting address fits in 18 bits and can be
3184 loaded with ila. For now, assume the address will not overflow if
3185 the displacement is "small" (fits 'K' constraint). */
3186 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3187 {
3188 rtx sym = XEXP (XEXP (op, 0), 0);
3189 rtx cst = XEXP (XEXP (op, 0), 1);
3190
3191 if (GET_CODE (sym) == SYMBOL_REF
3192 && GET_CODE (cst) == CONST_INT
3193 && satisfies_constraint_K (cst))
3194 return IC_IL1s;
3195 }
3196 return IC_IL2s;
3197
3198 case HIGH:
3199 return IC_IL1s;
3200
3201 case CONST_VECTOR:
3202 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3203 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3205 return IC_POOL;
3206 /* Fall through. */
3207
3208 case CONST_INT:
3209 case CONST_DOUBLE:
3210 constant_to_array (mode, op, arr);
3211
3212 /* Check that each 4-byte slot is identical. */
3213 repeated = 1;
3214 for (i = 4; i < 16; i += 4)
3215 for (j = 0; j < 4; j++)
3216 if (arr[j] != arr[i + j])
3217 repeated = 0;
3218
3219 if (repeated)
3220 {
3221 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3222 val = trunc_int_for_mode (val, SImode);
3223
3224 if (which_immediate_load (val) != SPU_NONE)
3225 return IC_IL1;
3226 }
3227
3228 /* Any mode of 2 bytes or smaller can be loaded with an il
3229 instruction. */
3230 gcc_assert (GET_MODE_SIZE (mode) > 2);
3231
3232 fsmbi = 1;
3233 repeat = 0;
3234 for (i = 0; i < 16 && fsmbi; i++)
3235 if (arr[i] != 0 && repeat == 0)
3236 repeat = arr[i];
3237 else if (arr[i] != 0 && arr[i] != repeat)
3238 fsmbi = 0;
3239 if (fsmbi)
3240 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3241
3242 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3243 return IC_CPAT;
3244
3245 if (repeated)
3246 return IC_IL2;
3247
3248 return IC_POOL;
3249 default:
3250 break;
3251 }
3252 gcc_unreachable ();
3253 }
3254
3255 static enum spu_immediate
3256 which_logical_immediate (HOST_WIDE_INT val)
3257 {
3258 gcc_assert (val == trunc_int_for_mode (val, SImode));
3259
3260 if (val >= -0x200 && val <= 0x1ff)
3261 return SPU_ORI;
3262 if (val >= 0 && val <= 0xffff)
3263 return SPU_IOHL;
3264 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3265 {
3266 val = trunc_int_for_mode (val, HImode);
3267 if (val >= -0x200 && val <= 0x1ff)
3268 return SPU_ORHI;
3269 if ((val & 0xff) == ((val >> 8) & 0xff))
3270 {
3271 val = trunc_int_for_mode (val, QImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORBI;
3274 }
3275 }
3276 return SPU_NONE;
3277 }
3278
3279 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3280 CONST_DOUBLEs. */
3281 static int
3282 const_vector_immediate_p (rtx x)
3283 {
3284 int i;
3285 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3286 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3287 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3288 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3289 return 0;
3290 return 1;
3291 }
3292
3293 int
3294 logical_immediate_p (rtx op, machine_mode mode)
3295 {
3296 HOST_WIDE_INT val;
3297 unsigned char arr[16];
3298 int i, j;
3299
3300 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3301 || GET_CODE (op) == CONST_VECTOR);
3302
3303 if (GET_CODE (op) == CONST_VECTOR
3304 && !const_vector_immediate_p (op))
3305 return 0;
3306
3307 if (GET_MODE (op) != VOIDmode)
3308 mode = GET_MODE (op);
3309
3310 constant_to_array (mode, op, arr);
3311
3312 /* Check that bytes are repeated. */
3313 for (i = 4; i < 16; i += 4)
3314 for (j = 0; j < 4; j++)
3315 if (arr[j] != arr[i + j])
3316 return 0;
3317
3318 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3319 val = trunc_int_for_mode (val, SImode);
3320
3321 i = which_logical_immediate (val);
3322 return i != SPU_NONE && i != SPU_IOHL;
3323 }
3324
3325 int
3326 iohl_immediate_p (rtx op, machine_mode mode)
3327 {
3328 HOST_WIDE_INT val;
3329 unsigned char arr[16];
3330 int i, j;
3331
3332 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3333 || GET_CODE (op) == CONST_VECTOR);
3334
3335 if (GET_CODE (op) == CONST_VECTOR
3336 && !const_vector_immediate_p (op))
3337 return 0;
3338
3339 if (GET_MODE (op) != VOIDmode)
3340 mode = GET_MODE (op);
3341
3342 constant_to_array (mode, op, arr);
3343
3344 /* Check that bytes are repeated. */
3345 for (i = 4; i < 16; i += 4)
3346 for (j = 0; j < 4; j++)
3347 if (arr[j] != arr[i + j])
3348 return 0;
3349
3350 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3351 val = trunc_int_for_mode (val, SImode);
3352
3353 return val >= 0 && val <= 0xffff;
3354 }
3355
3356 int
3357 arith_immediate_p (rtx op, machine_mode mode,
3358 HOST_WIDE_INT low, HOST_WIDE_INT high)
3359 {
3360 HOST_WIDE_INT val;
3361 unsigned char arr[16];
3362 int bytes, i, j;
3363
3364 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3365 || GET_CODE (op) == CONST_VECTOR);
3366
3367 if (GET_CODE (op) == CONST_VECTOR
3368 && !const_vector_immediate_p (op))
3369 return 0;
3370
3371 if (GET_MODE (op) != VOIDmode)
3372 mode = GET_MODE (op);
3373
3374 constant_to_array (mode, op, arr);
3375
3376 bytes = GET_MODE_UNIT_SIZE (mode);
3377 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3378
3379 /* Check that bytes are repeated. */
3380 for (i = bytes; i < 16; i += bytes)
3381 for (j = 0; j < bytes; j++)
3382 if (arr[j] != arr[i + j])
3383 return 0;
3384
3385 val = arr[0];
3386 for (j = 1; j < bytes; j++)
3387 val = (val << 8) | arr[j];
3388
3389 val = trunc_int_for_mode (val, mode);
3390
3391 return val >= low && val <= high;
3392 }
3393
3394 /* TRUE when op is an immediate and an exact power of 2, and given that
3395 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3396 all entries must be the same. */
3397 bool
3398 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3399 {
3400 machine_mode int_mode;
3401 HOST_WIDE_INT val;
3402 unsigned char arr[16];
3403 int bytes, i, j;
3404
3405 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 || GET_CODE (op) == CONST_VECTOR);
3407
3408 if (GET_CODE (op) == CONST_VECTOR
3409 && !const_vector_immediate_p (op))
3410 return 0;
3411
3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3414
3415 constant_to_array (mode, op, arr);
3416
3417 mode = GET_MODE_INNER (mode);
3418
3419 bytes = GET_MODE_SIZE (mode);
3420 int_mode = int_mode_for_mode (mode).require ();
3421
3422 /* Check that bytes are repeated. */
3423 for (i = bytes; i < 16; i += bytes)
3424 for (j = 0; j < bytes; j++)
3425 if (arr[j] != arr[i + j])
3426 return 0;
3427
3428 val = arr[0];
3429 for (j = 1; j < bytes; j++)
3430 val = (val << 8) | arr[j];
3431
3432 val = trunc_int_for_mode (val, int_mode);
3433
3434 /* Currently, we only handle SFmode */
3435 gcc_assert (mode == SFmode);
3436 if (mode == SFmode)
3437 {
3438 int exp = (val >> 23) - 127;
3439 return val > 0 && (val & 0x007fffff) == 0
3440 && exp >= low && exp <= high;
3441 }
3442 return FALSE;
3443 }
3444
3445 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3446
3447 static bool
3448 ea_symbol_ref_p (const_rtx x)
3449 {
3450 tree decl;
3451
3452 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3453 {
3454 rtx plus = XEXP (x, 0);
3455 rtx op0 = XEXP (plus, 0);
3456 rtx op1 = XEXP (plus, 1);
3457 if (GET_CODE (op1) == CONST_INT)
3458 x = op0;
3459 }
3460
3461 return (GET_CODE (x) == SYMBOL_REF
3462 && (decl = SYMBOL_REF_DECL (x)) != 0
3463 && TREE_CODE (decl) == VAR_DECL
3464 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3465 }
3466
3467 /* We accept:
3468 - any 32-bit constant (SImode, SFmode)
3469 - any constant that can be generated with fsmbi (any mode)
3470 - a 64-bit constant where the high and low bits are identical
3471 (DImode, DFmode)
3472 - a 128-bit constant where the four 32-bit words match. */
3473 bool
3474 spu_legitimate_constant_p (machine_mode mode, rtx x)
3475 {
3476 subrtx_iterator::array_type array;
3477 if (GET_CODE (x) == HIGH)
3478 x = XEXP (x, 0);
3479
3480 /* Reject any __ea qualified reference. These can't appear in
3481 instructions but must be forced to the constant pool. */
3482 FOR_EACH_SUBRTX (iter, array, x, ALL)
3483 if (ea_symbol_ref_p (*iter))
3484 return 0;
3485
3486 /* V4SI with all identical symbols is valid. */
3487 if (!flag_pic
3488 && mode == V4SImode
3489 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3490 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3491 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3492 return const_vec_duplicate_p (x);
3493
3494 if (GET_CODE (x) == CONST_VECTOR
3495 && !const_vector_immediate_p (x))
3496 return 0;
3497 return 1;
3498 }
3499
3500 /* Valid address are:
3501 - symbol_ref, label_ref, const
3502 - reg
3503 - reg + const_int, where const_int is 16 byte aligned
3504 - reg + reg, alignment doesn't matter
3505 The alignment matters in the reg+const case because lqd and stqd
3506 ignore the 4 least significant bits of the const. We only care about
3507 16 byte modes because the expand phase will change all smaller MEM
3508 references to TImode. */
3509 static bool
3510 spu_legitimate_address_p (machine_mode mode,
3511 rtx x, bool reg_ok_strict)
3512 {
3513 int aligned = GET_MODE_SIZE (mode) >= 16;
3514 if (aligned
3515 && GET_CODE (x) == AND
3516 && GET_CODE (XEXP (x, 1)) == CONST_INT
3517 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3518 x = XEXP (x, 0);
3519 switch (GET_CODE (x))
3520 {
3521 case LABEL_REF:
3522 return !TARGET_LARGE_MEM;
3523
3524 case SYMBOL_REF:
3525 case CONST:
3526 /* Keep __ea references until reload so that spu_expand_mov can see them
3527 in MEMs. */
3528 if (ea_symbol_ref_p (x))
3529 return !reload_in_progress && !reload_completed;
3530 return !TARGET_LARGE_MEM;
3531
3532 case CONST_INT:
3533 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3534
3535 case SUBREG:
3536 x = XEXP (x, 0);
3537 if (!REG_P (x))
3538 return 0;
3539 /* FALLTHRU */
3540
3541 case REG:
3542 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3543
3544 case PLUS:
3545 case LO_SUM:
3546 {
3547 rtx op0 = XEXP (x, 0);
3548 rtx op1 = XEXP (x, 1);
3549 if (GET_CODE (op0) == SUBREG)
3550 op0 = XEXP (op0, 0);
3551 if (GET_CODE (op1) == SUBREG)
3552 op1 = XEXP (op1, 0);
3553 if (GET_CODE (op0) == REG
3554 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3555 && GET_CODE (op1) == CONST_INT
3556 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3557 /* If virtual registers are involved, the displacement will
3558 change later on anyway, so checking would be premature.
3559 Reload will make sure the final displacement after
3560 register elimination is OK. */
3561 || op0 == arg_pointer_rtx
3562 || op0 == frame_pointer_rtx
3563 || op0 == virtual_stack_vars_rtx)
3564 && (!aligned || (INTVAL (op1) & 15) == 0))
3565 return TRUE;
3566 if (GET_CODE (op0) == REG
3567 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3568 && GET_CODE (op1) == REG
3569 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3570 return TRUE;
3571 }
3572 break;
3573
3574 default:
3575 break;
3576 }
3577 return FALSE;
3578 }
3579
3580 /* Like spu_legitimate_address_p, except with named addresses. */
3581 static bool
3582 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3583 bool reg_ok_strict, addr_space_t as)
3584 {
3585 if (as == ADDR_SPACE_EA)
3586 return (REG_P (x) && (GET_MODE (x) == EAmode));
3587
3588 else if (as != ADDR_SPACE_GENERIC)
3589 gcc_unreachable ();
3590
3591 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3592 }
3593
3594 /* When the address is reg + const_int, force the const_int into a
3595 register. */
3596 static rtx
3597 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3598 machine_mode mode ATTRIBUTE_UNUSED)
3599 {
3600 rtx op0, op1;
3601 /* Make sure both operands are registers. */
3602 if (GET_CODE (x) == PLUS)
3603 {
3604 op0 = XEXP (x, 0);
3605 op1 = XEXP (x, 1);
3606 if (ALIGNED_SYMBOL_REF_P (op0))
3607 {
3608 op0 = force_reg (Pmode, op0);
3609 mark_reg_pointer (op0, 128);
3610 }
3611 else if (GET_CODE (op0) != REG)
3612 op0 = force_reg (Pmode, op0);
3613 if (ALIGNED_SYMBOL_REF_P (op1))
3614 {
3615 op1 = force_reg (Pmode, op1);
3616 mark_reg_pointer (op1, 128);
3617 }
3618 else if (GET_CODE (op1) != REG)
3619 op1 = force_reg (Pmode, op1);
3620 x = gen_rtx_PLUS (Pmode, op0, op1);
3621 }
3622 return x;
3623 }
3624
3625 /* Like spu_legitimate_address, except with named address support. */
3626 static rtx
3627 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3628 addr_space_t as)
3629 {
3630 if (as != ADDR_SPACE_GENERIC)
3631 return x;
3632
3633 return spu_legitimize_address (x, oldx, mode);
3634 }
3635
3636 /* Reload reg + const_int for out-of-range displacements. */
3637 rtx
3638 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3639 int opnum, int type)
3640 {
3641 bool removed_and = false;
3642
3643 if (GET_CODE (ad) == AND
3644 && CONST_INT_P (XEXP (ad, 1))
3645 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3646 {
3647 ad = XEXP (ad, 0);
3648 removed_and = true;
3649 }
3650
3651 if (GET_CODE (ad) == PLUS
3652 && REG_P (XEXP (ad, 0))
3653 && CONST_INT_P (XEXP (ad, 1))
3654 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3655 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3656 {
3657 /* Unshare the sum. */
3658 ad = copy_rtx (ad);
3659
3660 /* Reload the displacement. */
3661 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3662 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3663 opnum, (enum reload_type) type);
3664
3665 /* Add back AND for alignment if we stripped it. */
3666 if (removed_and)
3667 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3668
3669 return ad;
3670 }
3671
3672 return NULL_RTX;
3673 }
3674
3675 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3676 struct attribute_spec.handler. */
3677 static tree
3678 spu_handle_fndecl_attribute (tree * node,
3679 tree name,
3680 tree args ATTRIBUTE_UNUSED,
3681 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3682 {
3683 if (TREE_CODE (*node) != FUNCTION_DECL)
3684 {
3685 warning (0, "%qE attribute only applies to functions",
3686 name);
3687 *no_add_attrs = true;
3688 }
3689
3690 return NULL_TREE;
3691 }
3692
3693 /* Handle the "vector" attribute. */
3694 static tree
3695 spu_handle_vector_attribute (tree * node, tree name,
3696 tree args ATTRIBUTE_UNUSED,
3697 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3698 {
3699 tree type = *node, result = NULL_TREE;
3700 machine_mode mode;
3701 int unsigned_p;
3702
3703 while (POINTER_TYPE_P (type)
3704 || TREE_CODE (type) == FUNCTION_TYPE
3705 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3706 type = TREE_TYPE (type);
3707
3708 mode = TYPE_MODE (type);
3709
3710 unsigned_p = TYPE_UNSIGNED (type);
3711 switch (mode)
3712 {
3713 case E_DImode:
3714 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3715 break;
3716 case E_SImode:
3717 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3718 break;
3719 case E_HImode:
3720 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3721 break;
3722 case E_QImode:
3723 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3724 break;
3725 case E_SFmode:
3726 result = V4SF_type_node;
3727 break;
3728 case E_DFmode:
3729 result = V2DF_type_node;
3730 break;
3731 default:
3732 break;
3733 }
3734
3735 /* Propagate qualifiers attached to the element type
3736 onto the vector type. */
3737 if (result && result != type && TYPE_QUALS (type))
3738 result = build_qualified_type (result, TYPE_QUALS (type));
3739
3740 *no_add_attrs = true; /* No need to hang on to the attribute. */
3741
3742 if (!result)
3743 warning (0, "%qE attribute ignored", name);
3744 else
3745 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3746
3747 return NULL_TREE;
3748 }
3749
3750 /* Return nonzero if FUNC is a naked function. */
3751 static int
3752 spu_naked_function_p (tree func)
3753 {
3754 tree a;
3755
3756 if (TREE_CODE (func) != FUNCTION_DECL)
3757 abort ();
3758
3759 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3760 return a != NULL_TREE;
3761 }
3762
3763 int
3764 spu_initial_elimination_offset (int from, int to)
3765 {
3766 int saved_regs_size = spu_saved_regs_size ();
3767 int sp_offset = 0;
3768 if (!crtl->is_leaf || crtl->outgoing_args_size
3769 || get_frame_size () || saved_regs_size)
3770 sp_offset = STACK_POINTER_OFFSET;
3771 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3772 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3773 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3774 return get_frame_size ();
3775 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3776 return sp_offset + crtl->outgoing_args_size
3777 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3778 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779 return get_frame_size () + saved_regs_size + sp_offset;
3780 else
3781 gcc_unreachable ();
3782 }
3783
3784 rtx
3785 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3786 {
3787 machine_mode mode = TYPE_MODE (type);
3788 int byte_size = ((mode == BLKmode)
3789 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3790
3791 /* Make sure small structs are left justified in a register. */
3792 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3793 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3794 {
3795 machine_mode smode;
3796 rtvec v;
3797 int i;
3798 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3799 int n = byte_size / UNITS_PER_WORD;
3800 v = rtvec_alloc (nregs);
3801 for (i = 0; i < n; i++)
3802 {
3803 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3804 gen_rtx_REG (TImode,
3805 FIRST_RETURN_REGNUM
3806 + i),
3807 GEN_INT (UNITS_PER_WORD * i));
3808 byte_size -= UNITS_PER_WORD;
3809 }
3810
3811 if (n < nregs)
3812 {
3813 if (byte_size < 4)
3814 byte_size = 4;
3815 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3816 RTVEC_ELT (v, n) =
3817 gen_rtx_EXPR_LIST (VOIDmode,
3818 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3819 GEN_INT (UNITS_PER_WORD * n));
3820 }
3821 return gen_rtx_PARALLEL (mode, v);
3822 }
3823 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3824 }
3825
3826 static rtx
3827 spu_function_arg (cumulative_args_t cum_v,
3828 machine_mode mode,
3829 const_tree type, bool named ATTRIBUTE_UNUSED)
3830 {
3831 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3832 int byte_size;
3833
3834 if (*cum >= MAX_REGISTER_ARGS)
3835 return 0;
3836
3837 byte_size = ((mode == BLKmode)
3838 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3839
3840 /* The ABI does not allow parameters to be passed partially in
3841 reg and partially in stack. */
3842 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3843 return 0;
3844
3845 /* Make sure small structs are left justified in a register. */
3846 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3847 && byte_size < UNITS_PER_WORD && byte_size > 0)
3848 {
3849 machine_mode smode;
3850 rtx gr_reg;
3851 if (byte_size < 4)
3852 byte_size = 4;
3853 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3854 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3855 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3856 const0_rtx);
3857 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3858 }
3859 else
3860 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3861 }
3862
3863 static void
3864 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3865 const_tree type, bool named ATTRIBUTE_UNUSED)
3866 {
3867 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3868
3869 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3870 ? 1
3871 : mode == BLKmode
3872 ? ((int_size_in_bytes (type) + 15) / 16)
3873 : mode == VOIDmode
3874 ? 1
3875 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3876 }
3877
3878 /* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3879 types at offset 0 in the quad-word on the stack. 8/16-bit types
3880 should be at offsets 3/2 respectively. */
3881
3882 static HOST_WIDE_INT
3883 spu_function_arg_offset (machine_mode mode, const_tree type)
3884 {
3885 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3886 return 4 - GET_MODE_SIZE (mode);
3887 return 0;
3888 }
3889
3890 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3891
3892 static pad_direction
3893 spu_function_arg_padding (machine_mode, const_tree)
3894 {
3895 return PAD_UPWARD;
3896 }
3897
3898 /* Variable sized types are passed by reference. */
3899 static bool
3900 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3901 machine_mode mode ATTRIBUTE_UNUSED,
3902 const_tree type, bool named ATTRIBUTE_UNUSED)
3903 {
3904 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3905 }
3906 \f
3907
3908 /* Var args. */
3909
3910 /* Create and return the va_list datatype.
3911
3912 On SPU, va_list is an array type equivalent to
3913
3914 typedef struct __va_list_tag
3915 {
3916 void *__args __attribute__((__aligned(16)));
3917 void *__skip __attribute__((__aligned(16)));
3918
3919 } va_list[1];
3920
3921 where __args points to the arg that will be returned by the next
3922 va_arg(), and __skip points to the previous stack frame such that
3923 when __args == __skip we should advance __args by 32 bytes. */
3924 static tree
3925 spu_build_builtin_va_list (void)
3926 {
3927 tree f_args, f_skip, record, type_decl;
3928 bool owp;
3929
3930 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3931
3932 type_decl =
3933 build_decl (BUILTINS_LOCATION,
3934 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3935
3936 f_args = build_decl (BUILTINS_LOCATION,
3937 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3938 f_skip = build_decl (BUILTINS_LOCATION,
3939 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3940
3941 DECL_FIELD_CONTEXT (f_args) = record;
3942 SET_DECL_ALIGN (f_args, 128);
3943 DECL_USER_ALIGN (f_args) = 1;
3944
3945 DECL_FIELD_CONTEXT (f_skip) = record;
3946 SET_DECL_ALIGN (f_skip, 128);
3947 DECL_USER_ALIGN (f_skip) = 1;
3948
3949 TYPE_STUB_DECL (record) = type_decl;
3950 TYPE_NAME (record) = type_decl;
3951 TYPE_FIELDS (record) = f_args;
3952 DECL_CHAIN (f_args) = f_skip;
3953
3954 /* We know this is being padded and we want it too. It is an internal
3955 type so hide the warnings from the user. */
3956 owp = warn_padded;
3957 warn_padded = false;
3958
3959 layout_type (record);
3960
3961 warn_padded = owp;
3962
3963 /* The correct type is an array type of one element. */
3964 return build_array_type (record, build_index_type (size_zero_node));
3965 }
3966
3967 /* Implement va_start by filling the va_list structure VALIST.
3968 NEXTARG points to the first anonymous stack argument.
3969
3970 The following global variables are used to initialize
3971 the va_list structure:
3972
3973 crtl->args.info;
3974 the CUMULATIVE_ARGS for this function
3975
3976 crtl->args.arg_offset_rtx:
3977 holds the offset of the first anonymous stack argument
3978 (relative to the virtual arg pointer). */
3979
3980 static void
3981 spu_va_start (tree valist, rtx nextarg)
3982 {
3983 tree f_args, f_skip;
3984 tree args, skip, t;
3985
3986 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3987 f_skip = DECL_CHAIN (f_args);
3988
3989 valist = build_simple_mem_ref (valist);
3990 args =
3991 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3992 skip =
3993 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3994
3995 /* Find the __args area. */
3996 t = make_tree (TREE_TYPE (args), nextarg);
3997 if (crtl->args.pretend_args_size > 0)
3998 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3999 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4000 TREE_SIDE_EFFECTS (t) = 1;
4001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4002
4003 /* Find the __skip area. */
4004 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4005 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4006 - STACK_POINTER_OFFSET));
4007 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4008 TREE_SIDE_EFFECTS (t) = 1;
4009 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4010 }
4011
4012 /* Gimplify va_arg by updating the va_list structure
4013 VALIST as required to retrieve an argument of type
4014 TYPE, and returning that argument.
4015
4016 ret = va_arg(VALIST, TYPE);
4017
4018 generates code equivalent to:
4019
4020 paddedsize = (sizeof(TYPE) + 15) & -16;
4021 if (VALIST.__args + paddedsize > VALIST.__skip
4022 && VALIST.__args <= VALIST.__skip)
4023 addr = VALIST.__skip + 32;
4024 else
4025 addr = VALIST.__args;
4026 VALIST.__args = addr + paddedsize;
4027 ret = *(TYPE *)addr;
4028 */
4029 static tree
4030 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4031 gimple_seq * post_p ATTRIBUTE_UNUSED)
4032 {
4033 tree f_args, f_skip;
4034 tree args, skip;
4035 HOST_WIDE_INT size, rsize;
4036 tree addr, tmp;
4037 bool pass_by_reference_p;
4038
4039 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4040 f_skip = DECL_CHAIN (f_args);
4041
4042 args =
4043 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4044 skip =
4045 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4046
4047 addr = create_tmp_var (ptr_type_node, "va_arg");
4048
4049 /* if an object is dynamically sized, a pointer to it is passed
4050 instead of the object itself. */
4051 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4052 false);
4053 if (pass_by_reference_p)
4054 type = build_pointer_type (type);
4055 size = int_size_in_bytes (type);
4056 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4057
4058 /* build conditional expression to calculate addr. The expression
4059 will be gimplified later. */
4060 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4061 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4062 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4063 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4064 unshare_expr (skip)));
4065
4066 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4067 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4068 unshare_expr (args));
4069
4070 gimplify_assign (addr, tmp, pre_p);
4071
4072 /* update VALIST.__args */
4073 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4074 gimplify_assign (unshare_expr (args), tmp, pre_p);
4075
4076 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4077 addr);
4078
4079 if (pass_by_reference_p)
4080 addr = build_va_arg_indirect_ref (addr);
4081
4082 return build_va_arg_indirect_ref (addr);
4083 }
4084
4085 /* Save parameter registers starting with the register that corresponds
4086 to the first unnamed parameters. If the first unnamed parameter is
4087 in the stack then save no registers. Set pretend_args_size to the
4088 amount of space needed to save the registers. */
4089 static void
4090 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4091 tree type, int *pretend_size, int no_rtl)
4092 {
4093 if (!no_rtl)
4094 {
4095 rtx tmp;
4096 int regno;
4097 int offset;
4098 int ncum = *get_cumulative_args (cum);
4099
4100 /* cum currently points to the last named argument, we want to
4101 start at the next argument. */
4102 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4103
4104 offset = -STACK_POINTER_OFFSET;
4105 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4106 {
4107 tmp = gen_frame_mem (V4SImode,
4108 plus_constant (Pmode, virtual_incoming_args_rtx,
4109 offset));
4110 emit_move_insn (tmp,
4111 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4112 offset += 16;
4113 }
4114 *pretend_size = offset + STACK_POINTER_OFFSET;
4115 }
4116 }
4117 \f
4118 static void
4119 spu_conditional_register_usage (void)
4120 {
4121 if (flag_pic)
4122 {
4123 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4124 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4125 }
4126 }
4127
4128 /* This is called any time we inspect the alignment of a register for
4129 addresses. */
4130 static int
4131 reg_aligned_for_addr (rtx x)
4132 {
4133 int regno =
4134 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4135 return REGNO_POINTER_ALIGN (regno) >= 128;
4136 }
4137
4138 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4139 into its SYMBOL_REF_FLAGS. */
4140 static void
4141 spu_encode_section_info (tree decl, rtx rtl, int first)
4142 {
4143 default_encode_section_info (decl, rtl, first);
4144
4145 /* If a variable has a forced alignment to < 16 bytes, mark it with
4146 SYMBOL_FLAG_ALIGN1. */
4147 if (TREE_CODE (decl) == VAR_DECL
4148 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4149 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4150 }
4151
4152 /* Return TRUE if we are certain the mem refers to a complete object
4153 which is both 16-byte aligned and padded to a 16-byte boundary. This
4154 would make it safe to store with a single instruction.
4155 We guarantee the alignment and padding for static objects by aligning
4156 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4157 FIXME: We currently cannot guarantee this for objects on the stack
4158 because assign_parm_setup_stack calls assign_stack_local with the
4159 alignment of the parameter mode and in that case the alignment never
4160 gets adjusted by LOCAL_ALIGNMENT. */
4161 static int
4162 store_with_one_insn_p (rtx mem)
4163 {
4164 machine_mode mode = GET_MODE (mem);
4165 rtx addr = XEXP (mem, 0);
4166 if (mode == BLKmode)
4167 return 0;
4168 if (GET_MODE_SIZE (mode) >= 16)
4169 return 1;
4170 /* Only static objects. */
4171 if (GET_CODE (addr) == SYMBOL_REF)
4172 {
4173 /* We use the associated declaration to make sure the access is
4174 referring to the whole object.
4175 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4176 if it is necessary. Will there be cases where one exists, and
4177 the other does not? Will there be cases where both exist, but
4178 have different types? */
4179 tree decl = MEM_EXPR (mem);
4180 if (decl
4181 && TREE_CODE (decl) == VAR_DECL
4182 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4183 return 1;
4184 decl = SYMBOL_REF_DECL (addr);
4185 if (decl
4186 && TREE_CODE (decl) == VAR_DECL
4187 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 return 1;
4189 }
4190 return 0;
4191 }
4192
4193 /* Return 1 when the address is not valid for a simple load and store as
4194 required by the '_mov*' patterns. We could make this less strict
4195 for loads, but we prefer mem's to look the same so they are more
4196 likely to be merged. */
4197 static int
4198 address_needs_split (rtx mem)
4199 {
4200 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4201 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4202 || !(store_with_one_insn_p (mem)
4203 || mem_is_padded_component_ref (mem))))
4204 return 1;
4205
4206 return 0;
4207 }
4208
4209 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4210 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4211 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4212
4213 /* MEM is known to be an __ea qualified memory access. Emit a call to
4214 fetch the ppu memory to local store, and return its address in local
4215 store. */
4216
4217 static void
4218 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4219 {
4220 if (is_store)
4221 {
4222 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4223 if (!cache_fetch_dirty)
4224 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4225 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4226 ea_addr, EAmode, ndirty, SImode);
4227 }
4228 else
4229 {
4230 if (!cache_fetch)
4231 cache_fetch = init_one_libfunc ("__cache_fetch");
4232 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4233 ea_addr, EAmode);
4234 }
4235 }
4236
4237 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4238 dirty bit marking, inline.
4239
4240 The cache control data structure is an array of
4241
4242 struct __cache_tag_array
4243 {
4244 unsigned int tag_lo[4];
4245 unsigned int tag_hi[4];
4246 void *data_pointer[4];
4247 int reserved[4];
4248 vector unsigned short dirty_bits[4];
4249 } */
4250
4251 static void
4252 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4253 {
4254 rtx ea_addr_si;
4255 HOST_WIDE_INT v;
4256 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4257 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4258 rtx index_mask = gen_reg_rtx (SImode);
4259 rtx tag_arr = gen_reg_rtx (Pmode);
4260 rtx splat_mask = gen_reg_rtx (TImode);
4261 rtx splat = gen_reg_rtx (V4SImode);
4262 rtx splat_hi = NULL_RTX;
4263 rtx tag_index = gen_reg_rtx (Pmode);
4264 rtx block_off = gen_reg_rtx (SImode);
4265 rtx tag_addr = gen_reg_rtx (Pmode);
4266 rtx tag = gen_reg_rtx (V4SImode);
4267 rtx cache_tag = gen_reg_rtx (V4SImode);
4268 rtx cache_tag_hi = NULL_RTX;
4269 rtx cache_ptrs = gen_reg_rtx (TImode);
4270 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4271 rtx tag_equal = gen_reg_rtx (V4SImode);
4272 rtx tag_equal_hi = NULL_RTX;
4273 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4274 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4275 rtx eq_index = gen_reg_rtx (SImode);
4276 rtx bcomp, hit_label, hit_ref, cont_label;
4277 rtx_insn *insn;
4278
4279 if (spu_ea_model != 32)
4280 {
4281 splat_hi = gen_reg_rtx (V4SImode);
4282 cache_tag_hi = gen_reg_rtx (V4SImode);
4283 tag_equal_hi = gen_reg_rtx (V4SImode);
4284 }
4285
4286 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4287 emit_move_insn (tag_arr, tag_arr_sym);
4288 v = 0x0001020300010203LL;
4289 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4290 ea_addr_si = ea_addr;
4291 if (spu_ea_model != 32)
4292 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4293
4294 /* tag_index = ea_addr & (tag_array_size - 128) */
4295 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4296
4297 /* splat ea_addr to all 4 slots. */
4298 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4299 /* Similarly for high 32 bits of ea_addr. */
4300 if (spu_ea_model != 32)
4301 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4302
4303 /* block_off = ea_addr & 127 */
4304 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4305
4306 /* tag_addr = tag_arr + tag_index */
4307 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4308
4309 /* Read cache tags. */
4310 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4311 if (spu_ea_model != 32)
4312 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4313 plus_constant (Pmode,
4314 tag_addr, 16)));
4315
4316 /* tag = ea_addr & -128 */
4317 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4318
4319 /* Read all four cache data pointers. */
4320 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4321 plus_constant (Pmode,
4322 tag_addr, 32)));
4323
4324 /* Compare tags. */
4325 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4326 if (spu_ea_model != 32)
4327 {
4328 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4329 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4330 }
4331
4332 /* At most one of the tags compare equal, so tag_equal has one
4333 32-bit slot set to all 1's, with the other slots all zero.
4334 gbb picks off low bit from each byte in the 128-bit registers,
4335 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4336 we have a hit. */
4337 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4338 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4339
4340 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4341 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4342
4343 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4344 (rotating eq_index mod 16 bytes). */
4345 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4346 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4347
4348 /* Add block offset to form final data address. */
4349 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4350
4351 /* Check that we did hit. */
4352 hit_label = gen_label_rtx ();
4353 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4354 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4355 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4356 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4357 hit_ref, pc_rtx)));
4358 /* Say that this branch is very likely to happen. */
4359 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4360
4361 ea_load_store (mem, is_store, ea_addr, data_addr);
4362 cont_label = gen_label_rtx ();
4363 emit_jump_insn (gen_jump (cont_label));
4364 emit_barrier ();
4365
4366 emit_label (hit_label);
4367
4368 if (is_store)
4369 {
4370 HOST_WIDE_INT v_hi;
4371 rtx dirty_bits = gen_reg_rtx (TImode);
4372 rtx dirty_off = gen_reg_rtx (SImode);
4373 rtx dirty_128 = gen_reg_rtx (TImode);
4374 rtx neg_block_off = gen_reg_rtx (SImode);
4375
4376 /* Set up mask with one dirty bit per byte of the mem we are
4377 writing, starting from top bit. */
4378 v_hi = v = -1;
4379 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4380 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4381 {
4382 v_hi = v;
4383 v = 0;
4384 }
4385 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4386
4387 /* Form index into cache dirty_bits. eq_index is one of
4388 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4389 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4390 offset to each of the four dirty_bits elements. */
4391 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4392
4393 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4394
4395 /* Rotate bit mask to proper bit. */
4396 emit_insn (gen_negsi2 (neg_block_off, block_off));
4397 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4398 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4399
4400 /* Or in the new dirty bits. */
4401 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4402
4403 /* Store. */
4404 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4405 }
4406
4407 emit_label (cont_label);
4408 }
4409
4410 static rtx
4411 expand_ea_mem (rtx mem, bool is_store)
4412 {
4413 rtx ea_addr;
4414 rtx data_addr = gen_reg_rtx (Pmode);
4415 rtx new_mem;
4416
4417 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4418 if (optimize_size || optimize == 0)
4419 ea_load_store (mem, is_store, ea_addr, data_addr);
4420 else
4421 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4422
4423 if (ea_alias_set == -1)
4424 ea_alias_set = new_alias_set ();
4425
4426 /* We generate a new MEM RTX to refer to the copy of the data
4427 in the cache. We do not copy memory attributes (except the
4428 alignment) from the original MEM, as they may no longer apply
4429 to the cache copy. */
4430 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4431 set_mem_alias_set (new_mem, ea_alias_set);
4432 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4433
4434 return new_mem;
4435 }
4436
4437 int
4438 spu_expand_mov (rtx * ops, machine_mode mode)
4439 {
4440 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4441 {
4442 /* Perform the move in the destination SUBREG's inner mode. */
4443 ops[0] = SUBREG_REG (ops[0]);
4444 mode = GET_MODE (ops[0]);
4445 ops[1] = gen_lowpart_common (mode, ops[1]);
4446 gcc_assert (ops[1]);
4447 }
4448
4449 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4450 {
4451 rtx from = SUBREG_REG (ops[1]);
4452 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4453
4454 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4455 && GET_MODE_CLASS (imode) == MODE_INT
4456 && subreg_lowpart_p (ops[1]));
4457
4458 if (GET_MODE_SIZE (imode) < 4)
4459 imode = SImode;
4460 if (imode != GET_MODE (from))
4461 from = gen_rtx_SUBREG (imode, from, 0);
4462
4463 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4464 {
4465 enum insn_code icode = convert_optab_handler (trunc_optab,
4466 mode, imode);
4467 emit_insn (GEN_FCN (icode) (ops[0], from));
4468 }
4469 else
4470 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4471 return 1;
4472 }
4473
4474 /* At least one of the operands needs to be a register. */
4475 if ((reload_in_progress | reload_completed) == 0
4476 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4477 {
4478 rtx temp = force_reg (mode, ops[1]);
4479 emit_move_insn (ops[0], temp);
4480 return 1;
4481 }
4482 if (reload_in_progress || reload_completed)
4483 {
4484 if (CONSTANT_P (ops[1]))
4485 return spu_split_immediate (ops);
4486 return 0;
4487 }
4488
4489 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4490 extend them. */
4491 if (GET_CODE (ops[1]) == CONST_INT)
4492 {
4493 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4494 if (val != INTVAL (ops[1]))
4495 {
4496 emit_move_insn (ops[0], GEN_INT (val));
4497 return 1;
4498 }
4499 }
4500 if (MEM_P (ops[0]))
4501 {
4502 if (MEM_ADDR_SPACE (ops[0]))
4503 ops[0] = expand_ea_mem (ops[0], true);
4504 return spu_split_store (ops);
4505 }
4506 if (MEM_P (ops[1]))
4507 {
4508 if (MEM_ADDR_SPACE (ops[1]))
4509 ops[1] = expand_ea_mem (ops[1], false);
4510 return spu_split_load (ops);
4511 }
4512
4513 return 0;
4514 }
4515
4516 static void
4517 spu_convert_move (rtx dst, rtx src)
4518 {
4519 machine_mode mode = GET_MODE (dst);
4520 machine_mode int_mode = int_mode_for_mode (mode).require ();
4521 rtx reg;
4522 gcc_assert (GET_MODE (src) == TImode);
4523 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4524 emit_insn (gen_rtx_SET (reg,
4525 gen_rtx_TRUNCATE (int_mode,
4526 gen_rtx_LSHIFTRT (TImode, src,
4527 GEN_INT (int_mode == DImode ? 64 : 96)))));
4528 if (int_mode != mode)
4529 {
4530 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4531 emit_move_insn (dst, reg);
4532 }
4533 }
4534
4535 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4536 the address from SRC and SRC+16. Return a REG or CONST_INT that
4537 specifies how many bytes to rotate the loaded registers, plus any
4538 extra from EXTRA_ROTQBY. The address and rotate amounts are
4539 normalized to improve merging of loads and rotate computations. */
4540 static rtx
4541 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4542 {
4543 rtx addr = XEXP (src, 0);
4544 rtx p0, p1, rot, addr0, addr1;
4545 int rot_amt;
4546
4547 rot = 0;
4548 rot_amt = 0;
4549
4550 if (MEM_ALIGN (src) >= 128)
4551 /* Address is already aligned; simply perform a TImode load. */ ;
4552 else if (GET_CODE (addr) == PLUS)
4553 {
4554 /* 8 cases:
4555 aligned reg + aligned reg => lqx
4556 aligned reg + unaligned reg => lqx, rotqby
4557 aligned reg + aligned const => lqd
4558 aligned reg + unaligned const => lqd, rotqbyi
4559 unaligned reg + aligned reg => lqx, rotqby
4560 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4561 unaligned reg + aligned const => lqd, rotqby
4562 unaligned reg + unaligned const -> not allowed by legitimate address
4563 */
4564 p0 = XEXP (addr, 0);
4565 p1 = XEXP (addr, 1);
4566 if (!reg_aligned_for_addr (p0))
4567 {
4568 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4569 {
4570 rot = gen_reg_rtx (SImode);
4571 emit_insn (gen_addsi3 (rot, p0, p1));
4572 }
4573 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4574 {
4575 if (INTVAL (p1) > 0
4576 && REG_POINTER (p0)
4577 && INTVAL (p1) * BITS_PER_UNIT
4578 < REGNO_POINTER_ALIGN (REGNO (p0)))
4579 {
4580 rot = gen_reg_rtx (SImode);
4581 emit_insn (gen_addsi3 (rot, p0, p1));
4582 addr = p0;
4583 }
4584 else
4585 {
4586 rtx x = gen_reg_rtx (SImode);
4587 emit_move_insn (x, p1);
4588 if (!spu_arith_operand (p1, SImode))
4589 p1 = x;
4590 rot = gen_reg_rtx (SImode);
4591 emit_insn (gen_addsi3 (rot, p0, p1));
4592 addr = gen_rtx_PLUS (Pmode, p0, x);
4593 }
4594 }
4595 else
4596 rot = p0;
4597 }
4598 else
4599 {
4600 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4601 {
4602 rot_amt = INTVAL (p1) & 15;
4603 if (INTVAL (p1) & -16)
4604 {
4605 p1 = GEN_INT (INTVAL (p1) & -16);
4606 addr = gen_rtx_PLUS (SImode, p0, p1);
4607 }
4608 else
4609 addr = p0;
4610 }
4611 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4612 rot = p1;
4613 }
4614 }
4615 else if (REG_P (addr))
4616 {
4617 if (!reg_aligned_for_addr (addr))
4618 rot = addr;
4619 }
4620 else if (GET_CODE (addr) == CONST)
4621 {
4622 if (GET_CODE (XEXP (addr, 0)) == PLUS
4623 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4624 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4625 {
4626 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4627 if (rot_amt & -16)
4628 addr = gen_rtx_CONST (Pmode,
4629 gen_rtx_PLUS (Pmode,
4630 XEXP (XEXP (addr, 0), 0),
4631 GEN_INT (rot_amt & -16)));
4632 else
4633 addr = XEXP (XEXP (addr, 0), 0);
4634 }
4635 else
4636 {
4637 rot = gen_reg_rtx (Pmode);
4638 emit_move_insn (rot, addr);
4639 }
4640 }
4641 else if (GET_CODE (addr) == CONST_INT)
4642 {
4643 rot_amt = INTVAL (addr);
4644 addr = GEN_INT (rot_amt & -16);
4645 }
4646 else if (!ALIGNED_SYMBOL_REF_P (addr))
4647 {
4648 rot = gen_reg_rtx (Pmode);
4649 emit_move_insn (rot, addr);
4650 }
4651
4652 rot_amt += extra_rotby;
4653
4654 rot_amt &= 15;
4655
4656 if (rot && rot_amt)
4657 {
4658 rtx x = gen_reg_rtx (SImode);
4659 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4660 rot = x;
4661 rot_amt = 0;
4662 }
4663 if (!rot && rot_amt)
4664 rot = GEN_INT (rot_amt);
4665
4666 addr0 = copy_rtx (addr);
4667 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4668 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4669
4670 if (dst1)
4671 {
4672 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4673 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4674 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4675 }
4676
4677 return rot;
4678 }
4679
4680 int
4681 spu_split_load (rtx * ops)
4682 {
4683 machine_mode mode = GET_MODE (ops[0]);
4684 rtx addr, load, rot;
4685 int rot_amt;
4686
4687 if (GET_MODE_SIZE (mode) >= 16)
4688 return 0;
4689
4690 addr = XEXP (ops[1], 0);
4691 gcc_assert (GET_CODE (addr) != AND);
4692
4693 if (!address_needs_split (ops[1]))
4694 {
4695 ops[1] = change_address (ops[1], TImode, addr);
4696 load = gen_reg_rtx (TImode);
4697 emit_insn (gen__movti (load, ops[1]));
4698 spu_convert_move (ops[0], load);
4699 return 1;
4700 }
4701
4702 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4703
4704 load = gen_reg_rtx (TImode);
4705 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4706
4707 if (rot)
4708 emit_insn (gen_rotqby_ti (load, load, rot));
4709
4710 spu_convert_move (ops[0], load);
4711 return 1;
4712 }
4713
4714 int
4715 spu_split_store (rtx * ops)
4716 {
4717 machine_mode mode = GET_MODE (ops[0]);
4718 rtx reg;
4719 rtx addr, p0, p1, p1_lo, smem;
4720 int aform;
4721 int scalar;
4722
4723 if (GET_MODE_SIZE (mode) >= 16)
4724 return 0;
4725
4726 addr = XEXP (ops[0], 0);
4727 gcc_assert (GET_CODE (addr) != AND);
4728
4729 if (!address_needs_split (ops[0]))
4730 {
4731 reg = gen_reg_rtx (TImode);
4732 emit_insn (gen_spu_convert (reg, ops[1]));
4733 ops[0] = change_address (ops[0], TImode, addr);
4734 emit_move_insn (ops[0], reg);
4735 return 1;
4736 }
4737
4738 if (GET_CODE (addr) == PLUS)
4739 {
4740 /* 8 cases:
4741 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4742 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4743 aligned reg + aligned const => lqd, c?d, shuf, stqx
4744 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4745 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4746 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4747 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4748 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4749 */
4750 aform = 0;
4751 p0 = XEXP (addr, 0);
4752 p1 = p1_lo = XEXP (addr, 1);
4753 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4754 {
4755 p1_lo = GEN_INT (INTVAL (p1) & 15);
4756 if (reg_aligned_for_addr (p0))
4757 {
4758 p1 = GEN_INT (INTVAL (p1) & -16);
4759 if (p1 == const0_rtx)
4760 addr = p0;
4761 else
4762 addr = gen_rtx_PLUS (SImode, p0, p1);
4763 }
4764 else
4765 {
4766 rtx x = gen_reg_rtx (SImode);
4767 emit_move_insn (x, p1);
4768 addr = gen_rtx_PLUS (SImode, p0, x);
4769 }
4770 }
4771 }
4772 else if (REG_P (addr))
4773 {
4774 aform = 0;
4775 p0 = addr;
4776 p1 = p1_lo = const0_rtx;
4777 }
4778 else
4779 {
4780 aform = 1;
4781 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4782 p1 = 0; /* aform doesn't use p1 */
4783 p1_lo = addr;
4784 if (ALIGNED_SYMBOL_REF_P (addr))
4785 p1_lo = const0_rtx;
4786 else if (GET_CODE (addr) == CONST
4787 && GET_CODE (XEXP (addr, 0)) == PLUS
4788 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4789 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4790 {
4791 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4792 if ((v & -16) != 0)
4793 addr = gen_rtx_CONST (Pmode,
4794 gen_rtx_PLUS (Pmode,
4795 XEXP (XEXP (addr, 0), 0),
4796 GEN_INT (v & -16)));
4797 else
4798 addr = XEXP (XEXP (addr, 0), 0);
4799 p1_lo = GEN_INT (v & 15);
4800 }
4801 else if (GET_CODE (addr) == CONST_INT)
4802 {
4803 p1_lo = GEN_INT (INTVAL (addr) & 15);
4804 addr = GEN_INT (INTVAL (addr) & -16);
4805 }
4806 else
4807 {
4808 p1_lo = gen_reg_rtx (SImode);
4809 emit_move_insn (p1_lo, addr);
4810 }
4811 }
4812
4813 gcc_assert (aform == 0 || aform == 1);
4814 reg = gen_reg_rtx (TImode);
4815
4816 scalar = store_with_one_insn_p (ops[0]);
4817 if (!scalar)
4818 {
4819 /* We could copy the flags from the ops[0] MEM to mem here,
4820 We don't because we want this load to be optimized away if
4821 possible, and copying the flags will prevent that in certain
4822 cases, e.g. consider the volatile flag. */
4823
4824 rtx pat = gen_reg_rtx (TImode);
4825 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4826 set_mem_alias_set (lmem, 0);
4827 emit_insn (gen_movti (reg, lmem));
4828
4829 if (!p0 || reg_aligned_for_addr (p0))
4830 p0 = stack_pointer_rtx;
4831 if (!p1_lo)
4832 p1_lo = const0_rtx;
4833
4834 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4835 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4836 }
4837 else
4838 {
4839 if (GET_CODE (ops[1]) == REG)
4840 emit_insn (gen_spu_convert (reg, ops[1]));
4841 else if (GET_CODE (ops[1]) == SUBREG)
4842 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4843 else
4844 abort ();
4845 }
4846
4847 if (GET_MODE_SIZE (mode) < 4 && scalar)
4848 emit_insn (gen_ashlti3
4849 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4850
4851 smem = change_address (ops[0], TImode, copy_rtx (addr));
4852 /* We can't use the previous alias set because the memory has changed
4853 size and can potentially overlap objects of other types. */
4854 set_mem_alias_set (smem, 0);
4855
4856 emit_insn (gen_movti (smem, reg));
4857 return 1;
4858 }
4859
4860 /* Return TRUE if X is MEM which is a struct member reference
4861 and the member can safely be loaded and stored with a single
4862 instruction because it is padded. */
4863 static int
4864 mem_is_padded_component_ref (rtx x)
4865 {
4866 tree t = MEM_EXPR (x);
4867 tree r;
4868 if (!t || TREE_CODE (t) != COMPONENT_REF)
4869 return 0;
4870 t = TREE_OPERAND (t, 1);
4871 if (!t || TREE_CODE (t) != FIELD_DECL
4872 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4873 return 0;
4874 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4875 r = DECL_FIELD_CONTEXT (t);
4876 if (!r || TREE_CODE (r) != RECORD_TYPE)
4877 return 0;
4878 /* Make sure they are the same mode */
4879 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4880 return 0;
4881 /* If there are no following fields then the field alignment assures
4882 the structure is padded to the alignment which means this field is
4883 padded too. */
4884 if (TREE_CHAIN (t) == 0)
4885 return 1;
4886 /* If the following field is also aligned then this field will be
4887 padded. */
4888 t = TREE_CHAIN (t);
4889 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4890 return 1;
4891 return 0;
4892 }
4893
4894 /* Parse the -mfixed-range= option string. */
4895 static void
4896 fix_range (const char *const_str)
4897 {
4898 int i, first, last;
4899 char *str, *dash, *comma;
4900
4901 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4902 REG2 are either register names or register numbers. The effect
4903 of this option is to mark the registers in the range from REG1 to
4904 REG2 as ``fixed'' so they won't be used by the compiler. */
4905
4906 i = strlen (const_str);
4907 str = (char *) alloca (i + 1);
4908 memcpy (str, const_str, i + 1);
4909
4910 while (1)
4911 {
4912 dash = strchr (str, '-');
4913 if (!dash)
4914 {
4915 warning (0, "value of -mfixed-range must have form REG1-REG2");
4916 return;
4917 }
4918 *dash = '\0';
4919 comma = strchr (dash + 1, ',');
4920 if (comma)
4921 *comma = '\0';
4922
4923 first = decode_reg_name (str);
4924 if (first < 0)
4925 {
4926 warning (0, "unknown register name: %s", str);
4927 return;
4928 }
4929
4930 last = decode_reg_name (dash + 1);
4931 if (last < 0)
4932 {
4933 warning (0, "unknown register name: %s", dash + 1);
4934 return;
4935 }
4936
4937 *dash = '-';
4938
4939 if (first > last)
4940 {
4941 warning (0, "%s-%s is an empty range", str, dash + 1);
4942 return;
4943 }
4944
4945 for (i = first; i <= last; ++i)
4946 fixed_regs[i] = call_used_regs[i] = 1;
4947
4948 if (!comma)
4949 break;
4950
4951 *comma = ',';
4952 str = comma + 1;
4953 }
4954 }
4955
4956 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4957 can be generated using the fsmbi instruction. */
4958 int
4959 fsmbi_const_p (rtx x)
4960 {
4961 if (CONSTANT_P (x))
4962 {
4963 /* We can always choose TImode for CONST_INT because the high bits
4964 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4965 enum immediate_class c = classify_immediate (x, TImode);
4966 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4967 }
4968 return 0;
4969 }
4970
4971 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4972 can be generated using the cbd, chd, cwd or cdd instruction. */
4973 int
4974 cpat_const_p (rtx x, machine_mode mode)
4975 {
4976 if (CONSTANT_P (x))
4977 {
4978 enum immediate_class c = classify_immediate (x, mode);
4979 return c == IC_CPAT;
4980 }
4981 return 0;
4982 }
4983
4984 rtx
4985 gen_cpat_const (rtx * ops)
4986 {
4987 unsigned char dst[16];
4988 int i, offset, shift, isize;
4989 if (GET_CODE (ops[3]) != CONST_INT
4990 || GET_CODE (ops[2]) != CONST_INT
4991 || (GET_CODE (ops[1]) != CONST_INT
4992 && GET_CODE (ops[1]) != REG))
4993 return 0;
4994 if (GET_CODE (ops[1]) == REG
4995 && (!REG_POINTER (ops[1])
4996 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4997 return 0;
4998
4999 for (i = 0; i < 16; i++)
5000 dst[i] = i + 16;
5001 isize = INTVAL (ops[3]);
5002 if (isize == 1)
5003 shift = 3;
5004 else if (isize == 2)
5005 shift = 2;
5006 else
5007 shift = 0;
5008 offset = (INTVAL (ops[2]) +
5009 (GET_CODE (ops[1]) ==
5010 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5011 for (i = 0; i < isize; i++)
5012 dst[offset + i] = i + shift;
5013 return array_to_constant (TImode, dst);
5014 }
5015
5016 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5017 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5018 than 16 bytes, the value is repeated across the rest of the array. */
5019 void
5020 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5021 {
5022 HOST_WIDE_INT val;
5023 int i, j, first;
5024
5025 memset (arr, 0, 16);
5026 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5027 if (GET_CODE (x) == CONST_INT
5028 || (GET_CODE (x) == CONST_DOUBLE
5029 && (mode == SFmode || mode == DFmode)))
5030 {
5031 gcc_assert (mode != VOIDmode && mode != BLKmode);
5032
5033 if (GET_CODE (x) == CONST_DOUBLE)
5034 val = const_double_to_hwint (x);
5035 else
5036 val = INTVAL (x);
5037 first = GET_MODE_SIZE (mode) - 1;
5038 for (i = first; i >= 0; i--)
5039 {
5040 arr[i] = val & 0xff;
5041 val >>= 8;
5042 }
5043 /* Splat the constant across the whole array. */
5044 for (j = 0, i = first + 1; i < 16; i++)
5045 {
5046 arr[i] = arr[j];
5047 j = (j == first) ? 0 : j + 1;
5048 }
5049 }
5050 else if (GET_CODE (x) == CONST_DOUBLE)
5051 {
5052 val = CONST_DOUBLE_LOW (x);
5053 for (i = 15; i >= 8; i--)
5054 {
5055 arr[i] = val & 0xff;
5056 val >>= 8;
5057 }
5058 val = CONST_DOUBLE_HIGH (x);
5059 for (i = 7; i >= 0; i--)
5060 {
5061 arr[i] = val & 0xff;
5062 val >>= 8;
5063 }
5064 }
5065 else if (GET_CODE (x) == CONST_VECTOR)
5066 {
5067 int units;
5068 rtx elt;
5069 mode = GET_MODE_INNER (mode);
5070 units = CONST_VECTOR_NUNITS (x);
5071 for (i = 0; i < units; i++)
5072 {
5073 elt = CONST_VECTOR_ELT (x, i);
5074 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5075 {
5076 if (GET_CODE (elt) == CONST_DOUBLE)
5077 val = const_double_to_hwint (elt);
5078 else
5079 val = INTVAL (elt);
5080 first = GET_MODE_SIZE (mode) - 1;
5081 if (first + i * GET_MODE_SIZE (mode) > 16)
5082 abort ();
5083 for (j = first; j >= 0; j--)
5084 {
5085 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5086 val >>= 8;
5087 }
5088 }
5089 }
5090 }
5091 else
5092 gcc_unreachable();
5093 }
5094
5095 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5096 smaller than 16 bytes, use the bytes that would represent that value
5097 in a register, e.g., for QImode return the value of arr[3]. */
5098 rtx
5099 array_to_constant (machine_mode mode, const unsigned char arr[16])
5100 {
5101 machine_mode inner_mode;
5102 rtvec v;
5103 int units, size, i, j, k;
5104 HOST_WIDE_INT val;
5105
5106 if (GET_MODE_CLASS (mode) == MODE_INT
5107 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5108 {
5109 j = GET_MODE_SIZE (mode);
5110 i = j < 4 ? 4 - j : 0;
5111 for (val = 0; i < j; i++)
5112 val = (val << 8) | arr[i];
5113 val = trunc_int_for_mode (val, mode);
5114 return GEN_INT (val);
5115 }
5116
5117 if (mode == TImode)
5118 {
5119 HOST_WIDE_INT high;
5120 for (i = high = 0; i < 8; i++)
5121 high = (high << 8) | arr[i];
5122 for (i = 8, val = 0; i < 16; i++)
5123 val = (val << 8) | arr[i];
5124 return immed_double_const (val, high, TImode);
5125 }
5126 if (mode == SFmode)
5127 {
5128 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5129 val = trunc_int_for_mode (val, SImode);
5130 return hwint_to_const_double (SFmode, val);
5131 }
5132 if (mode == DFmode)
5133 {
5134 for (i = 0, val = 0; i < 8; i++)
5135 val = (val << 8) | arr[i];
5136 return hwint_to_const_double (DFmode, val);
5137 }
5138
5139 if (!VECTOR_MODE_P (mode))
5140 abort ();
5141
5142 units = GET_MODE_NUNITS (mode);
5143 size = GET_MODE_UNIT_SIZE (mode);
5144 inner_mode = GET_MODE_INNER (mode);
5145 v = rtvec_alloc (units);
5146
5147 for (k = i = 0; i < units; ++i)
5148 {
5149 val = 0;
5150 for (j = 0; j < size; j++, k++)
5151 val = (val << 8) | arr[k];
5152
5153 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5154 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5155 else
5156 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5157 }
5158 if (k > 16)
5159 abort ();
5160
5161 return gen_rtx_CONST_VECTOR (mode, v);
5162 }
5163
5164 static void
5165 reloc_diagnostic (rtx x)
5166 {
5167 tree decl = 0;
5168 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5169 return;
5170
5171 if (GET_CODE (x) == SYMBOL_REF)
5172 decl = SYMBOL_REF_DECL (x);
5173 else if (GET_CODE (x) == CONST
5174 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5175 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5176
5177 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5178 if (decl && !DECL_P (decl))
5179 decl = 0;
5180
5181 /* The decl could be a string constant. */
5182 if (decl && DECL_P (decl))
5183 {
5184 location_t loc;
5185 /* We use last_assemble_variable_decl to get line information. It's
5186 not always going to be right and might not even be close, but will
5187 be right for the more common cases. */
5188 if (!last_assemble_variable_decl || in_section == ctors_section)
5189 loc = DECL_SOURCE_LOCATION (decl);
5190 else
5191 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5192
5193 if (TARGET_WARN_RELOC)
5194 warning_at (loc, 0,
5195 "creating run-time relocation for %qD", decl);
5196 else
5197 error_at (loc,
5198 "creating run-time relocation for %qD", decl);
5199 }
5200 else
5201 {
5202 if (TARGET_WARN_RELOC)
5203 warning_at (input_location, 0, "creating run-time relocation");
5204 else
5205 error_at (input_location, "creating run-time relocation");
5206 }
5207 }
5208
5209 /* Hook into assemble_integer so we can generate an error for run-time
5210 relocations. The SPU ABI disallows them. */
5211 static bool
5212 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5213 {
5214 /* By default run-time relocations aren't supported, but we allow them
5215 in case users support it in their own run-time loader. And we provide
5216 a warning for those users that don't. */
5217 if ((GET_CODE (x) == SYMBOL_REF)
5218 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5219 reloc_diagnostic (x);
5220
5221 return default_assemble_integer (x, size, aligned_p);
5222 }
5223
5224 static void
5225 spu_asm_globalize_label (FILE * file, const char *name)
5226 {
5227 fputs ("\t.global\t", file);
5228 assemble_name (file, name);
5229 fputs ("\n", file);
5230 }
5231
5232 static bool
5233 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5234 int opno ATTRIBUTE_UNUSED, int *total,
5235 bool speed ATTRIBUTE_UNUSED)
5236 {
5237 int code = GET_CODE (x);
5238 int cost = COSTS_N_INSNS (2);
5239
5240 /* Folding to a CONST_VECTOR will use extra space but there might
5241 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5242 only if it allows us to fold away multiple insns. Changing the cost
5243 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5244 because this cost will only be compared against a single insn.
5245 if (code == CONST_VECTOR)
5246 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5247 */
5248
5249 /* Use defaults for float operations. Not accurate but good enough. */
5250 if (mode == DFmode)
5251 {
5252 *total = COSTS_N_INSNS (13);
5253 return true;
5254 }
5255 if (mode == SFmode)
5256 {
5257 *total = COSTS_N_INSNS (6);
5258 return true;
5259 }
5260 switch (code)
5261 {
5262 case CONST_INT:
5263 if (satisfies_constraint_K (x))
5264 *total = 0;
5265 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5266 *total = COSTS_N_INSNS (1);
5267 else
5268 *total = COSTS_N_INSNS (3);
5269 return true;
5270
5271 case CONST:
5272 *total = COSTS_N_INSNS (3);
5273 return true;
5274
5275 case LABEL_REF:
5276 case SYMBOL_REF:
5277 *total = COSTS_N_INSNS (0);
5278 return true;
5279
5280 case CONST_DOUBLE:
5281 *total = COSTS_N_INSNS (5);
5282 return true;
5283
5284 case FLOAT_EXTEND:
5285 case FLOAT_TRUNCATE:
5286 case FLOAT:
5287 case UNSIGNED_FLOAT:
5288 case FIX:
5289 case UNSIGNED_FIX:
5290 *total = COSTS_N_INSNS (7);
5291 return true;
5292
5293 case PLUS:
5294 if (mode == TImode)
5295 {
5296 *total = COSTS_N_INSNS (9);
5297 return true;
5298 }
5299 break;
5300
5301 case MULT:
5302 cost =
5303 GET_CODE (XEXP (x, 0)) ==
5304 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5305 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5306 {
5307 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5308 {
5309 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5310 cost = COSTS_N_INSNS (14);
5311 if ((val & 0xffff) == 0)
5312 cost = COSTS_N_INSNS (9);
5313 else if (val > 0 && val < 0x10000)
5314 cost = COSTS_N_INSNS (11);
5315 }
5316 }
5317 *total = cost;
5318 return true;
5319 case DIV:
5320 case UDIV:
5321 case MOD:
5322 case UMOD:
5323 *total = COSTS_N_INSNS (20);
5324 return true;
5325 case ROTATE:
5326 case ROTATERT:
5327 case ASHIFT:
5328 case ASHIFTRT:
5329 case LSHIFTRT:
5330 *total = COSTS_N_INSNS (4);
5331 return true;
5332 case UNSPEC:
5333 if (XINT (x, 1) == UNSPEC_CONVERT)
5334 *total = COSTS_N_INSNS (0);
5335 else
5336 *total = COSTS_N_INSNS (4);
5337 return true;
5338 }
5339 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5340 if (GET_MODE_CLASS (mode) == MODE_INT
5341 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5342 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5343 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5344 *total = cost;
5345 return true;
5346 }
5347
5348 static scalar_int_mode
5349 spu_unwind_word_mode (void)
5350 {
5351 return SImode;
5352 }
5353
5354 /* Decide whether we can make a sibling call to a function. DECL is the
5355 declaration of the function being targeted by the call and EXP is the
5356 CALL_EXPR representing the call. */
5357 static bool
5358 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5359 {
5360 return decl && !TARGET_LARGE_MEM;
5361 }
5362
5363 /* We need to correctly update the back chain pointer and the Available
5364 Stack Size (which is in the second slot of the sp register.) */
5365 void
5366 spu_allocate_stack (rtx op0, rtx op1)
5367 {
5368 HOST_WIDE_INT v;
5369 rtx chain = gen_reg_rtx (V4SImode);
5370 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5371 rtx sp = gen_reg_rtx (V4SImode);
5372 rtx splatted = gen_reg_rtx (V4SImode);
5373 rtx pat = gen_reg_rtx (TImode);
5374
5375 /* copy the back chain so we can save it back again. */
5376 emit_move_insn (chain, stack_bot);
5377
5378 op1 = force_reg (SImode, op1);
5379
5380 v = 0x1020300010203ll;
5381 emit_move_insn (pat, immed_double_const (v, v, TImode));
5382 emit_insn (gen_shufb (splatted, op1, op1, pat));
5383
5384 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5385 emit_insn (gen_subv4si3 (sp, sp, splatted));
5386
5387 if (flag_stack_check || flag_stack_clash_protection)
5388 {
5389 rtx avail = gen_reg_rtx(SImode);
5390 rtx result = gen_reg_rtx(SImode);
5391 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5392 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5393 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5394 }
5395
5396 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5397
5398 emit_move_insn (stack_bot, chain);
5399
5400 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5401 }
5402
5403 void
5404 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5405 {
5406 static unsigned char arr[16] =
5407 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5408 rtx temp = gen_reg_rtx (SImode);
5409 rtx temp2 = gen_reg_rtx (SImode);
5410 rtx temp3 = gen_reg_rtx (V4SImode);
5411 rtx temp4 = gen_reg_rtx (V4SImode);
5412 rtx pat = gen_reg_rtx (TImode);
5413 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5414
5415 /* Restore the backchain from the first word, sp from the second. */
5416 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5417 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5418
5419 emit_move_insn (pat, array_to_constant (TImode, arr));
5420
5421 /* Compute Available Stack Size for sp */
5422 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5423 emit_insn (gen_shufb (temp3, temp, temp, pat));
5424
5425 /* Compute Available Stack Size for back chain */
5426 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5427 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5428 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5429
5430 emit_insn (gen_addv4si3 (sp, sp, temp3));
5431 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5432 }
5433
5434 static void
5435 spu_init_libfuncs (void)
5436 {
5437 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5438 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5439 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5440 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5441 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5442 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5443 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5444 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5445 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5446 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5447 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5448 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5449
5450 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5451 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5452
5453 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5454 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5455 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5456 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5457 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5458 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5459 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5460 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5461 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5462 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5463 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5464 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5465
5466 set_optab_libfunc (smul_optab, TImode, "__multi3");
5467 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5468 set_optab_libfunc (smod_optab, TImode, "__modti3");
5469 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5470 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5471 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5472 }
5473
5474 /* Make a subreg, stripping any existing subreg. We could possibly just
5475 call simplify_subreg, but in this case we know what we want. */
5476 rtx
5477 spu_gen_subreg (machine_mode mode, rtx x)
5478 {
5479 if (GET_CODE (x) == SUBREG)
5480 x = SUBREG_REG (x);
5481 if (GET_MODE (x) == mode)
5482 return x;
5483 return gen_rtx_SUBREG (mode, x, 0);
5484 }
5485
5486 static bool
5487 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5488 {
5489 return (TYPE_MODE (type) == BLKmode
5490 && ((type) == 0
5491 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5492 || int_size_in_bytes (type) >
5493 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5494 }
5495 \f
5496 /* Create the built-in types and functions */
5497
5498 enum spu_function_code
5499 {
5500 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5501 #include "spu-builtins.def"
5502 #undef DEF_BUILTIN
5503 NUM_SPU_BUILTINS
5504 };
5505
5506 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5507
5508 struct spu_builtin_description spu_builtins[] = {
5509 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5510 {fcode, icode, name, type, params},
5511 #include "spu-builtins.def"
5512 #undef DEF_BUILTIN
5513 };
5514
5515 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5516
5517 /* Returns the spu builtin decl for CODE. */
5518
5519 static tree
5520 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5521 {
5522 if (code >= NUM_SPU_BUILTINS)
5523 return error_mark_node;
5524
5525 return spu_builtin_decls[code];
5526 }
5527
5528
5529 static void
5530 spu_init_builtins (void)
5531 {
5532 struct spu_builtin_description *d;
5533 unsigned int i;
5534
5535 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5536 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5537 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5538 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5539 V4SF_type_node = build_vector_type (float_type_node, 4);
5540 V2DF_type_node = build_vector_type (double_type_node, 2);
5541
5542 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5543 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5544 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5545 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5546
5547 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5548
5549 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5561
5562 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5563 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5564 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5566 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5567 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5568 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5570
5571 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5572 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5573
5574 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5575
5576 spu_builtin_types[SPU_BTI_PTR] =
5577 build_pointer_type (build_qualified_type
5578 (void_type_node,
5579 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5580
5581 /* For each builtin we build a new prototype. The tree code will make
5582 sure nodes are shared. */
5583 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5584 {
5585 tree p;
5586 char name[64]; /* build_function will make a copy. */
5587 int parm;
5588
5589 if (d->name == 0)
5590 continue;
5591
5592 /* Find last parm. */
5593 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5594 ;
5595
5596 p = void_list_node;
5597 while (parm > 1)
5598 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5599
5600 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5601
5602 sprintf (name, "__builtin_%s", d->name);
5603 spu_builtin_decls[i] =
5604 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5605 if (d->fcode == SPU_MASK_FOR_LOAD)
5606 TREE_READONLY (spu_builtin_decls[i]) = 1;
5607
5608 /* These builtins don't throw. */
5609 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5610 }
5611 }
5612
5613 void
5614 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5615 {
5616 static unsigned char arr[16] =
5617 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5618
5619 rtx temp = gen_reg_rtx (Pmode);
5620 rtx temp2 = gen_reg_rtx (V4SImode);
5621 rtx temp3 = gen_reg_rtx (V4SImode);
5622 rtx pat = gen_reg_rtx (TImode);
5623 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5624
5625 emit_move_insn (pat, array_to_constant (TImode, arr));
5626
5627 /* Restore the sp. */
5628 emit_move_insn (temp, op1);
5629 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5630
5631 /* Compute available stack size for sp. */
5632 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5633 emit_insn (gen_shufb (temp3, temp, temp, pat));
5634
5635 emit_insn (gen_addv4si3 (sp, sp, temp3));
5636 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5637 }
5638
5639 int
5640 spu_safe_dma (HOST_WIDE_INT channel)
5641 {
5642 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5643 }
5644
5645 void
5646 spu_builtin_splats (rtx ops[])
5647 {
5648 machine_mode mode = GET_MODE (ops[0]);
5649 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5650 {
5651 unsigned char arr[16];
5652 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5653 emit_move_insn (ops[0], array_to_constant (mode, arr));
5654 }
5655 else
5656 {
5657 rtx reg = gen_reg_rtx (TImode);
5658 rtx shuf;
5659 if (GET_CODE (ops[1]) != REG
5660 && GET_CODE (ops[1]) != SUBREG)
5661 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5662 switch (mode)
5663 {
5664 case E_V2DImode:
5665 case E_V2DFmode:
5666 shuf =
5667 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5668 TImode);
5669 break;
5670 case E_V4SImode:
5671 case E_V4SFmode:
5672 shuf =
5673 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5674 TImode);
5675 break;
5676 case E_V8HImode:
5677 shuf =
5678 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5679 TImode);
5680 break;
5681 case E_V16QImode:
5682 shuf =
5683 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5684 TImode);
5685 break;
5686 default:
5687 abort ();
5688 }
5689 emit_move_insn (reg, shuf);
5690 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5691 }
5692 }
5693
5694 void
5695 spu_builtin_extract (rtx ops[])
5696 {
5697 machine_mode mode;
5698 rtx rot, from, tmp;
5699
5700 mode = GET_MODE (ops[1]);
5701
5702 if (GET_CODE (ops[2]) == CONST_INT)
5703 {
5704 switch (mode)
5705 {
5706 case E_V16QImode:
5707 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5708 break;
5709 case E_V8HImode:
5710 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5711 break;
5712 case E_V4SFmode:
5713 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5714 break;
5715 case E_V4SImode:
5716 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5717 break;
5718 case E_V2DImode:
5719 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5720 break;
5721 case E_V2DFmode:
5722 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5723 break;
5724 default:
5725 abort ();
5726 }
5727 return;
5728 }
5729
5730 from = spu_gen_subreg (TImode, ops[1]);
5731 rot = gen_reg_rtx (TImode);
5732 tmp = gen_reg_rtx (SImode);
5733
5734 switch (mode)
5735 {
5736 case E_V16QImode:
5737 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5738 break;
5739 case E_V8HImode:
5740 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5741 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5742 break;
5743 case E_V4SFmode:
5744 case E_V4SImode:
5745 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5746 break;
5747 case E_V2DImode:
5748 case E_V2DFmode:
5749 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5750 break;
5751 default:
5752 abort ();
5753 }
5754 emit_insn (gen_rotqby_ti (rot, from, tmp));
5755
5756 emit_insn (gen_spu_convert (ops[0], rot));
5757 }
5758
5759 void
5760 spu_builtin_insert (rtx ops[])
5761 {
5762 machine_mode mode = GET_MODE (ops[0]);
5763 machine_mode imode = GET_MODE_INNER (mode);
5764 rtx mask = gen_reg_rtx (TImode);
5765 rtx offset;
5766
5767 if (GET_CODE (ops[3]) == CONST_INT)
5768 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5769 else
5770 {
5771 offset = gen_reg_rtx (SImode);
5772 emit_insn (gen_mulsi3
5773 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5774 }
5775 emit_insn (gen_cpat
5776 (mask, stack_pointer_rtx, offset,
5777 GEN_INT (GET_MODE_SIZE (imode))));
5778 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5779 }
5780
5781 void
5782 spu_builtin_promote (rtx ops[])
5783 {
5784 machine_mode mode, imode;
5785 rtx rot, from, offset;
5786 HOST_WIDE_INT pos;
5787
5788 mode = GET_MODE (ops[0]);
5789 imode = GET_MODE_INNER (mode);
5790
5791 from = gen_reg_rtx (TImode);
5792 rot = spu_gen_subreg (TImode, ops[0]);
5793
5794 emit_insn (gen_spu_convert (from, ops[1]));
5795
5796 if (GET_CODE (ops[2]) == CONST_INT)
5797 {
5798 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5799 if (GET_MODE_SIZE (imode) < 4)
5800 pos += 4 - GET_MODE_SIZE (imode);
5801 offset = GEN_INT (pos & 15);
5802 }
5803 else
5804 {
5805 offset = gen_reg_rtx (SImode);
5806 switch (mode)
5807 {
5808 case E_V16QImode:
5809 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5810 break;
5811 case E_V8HImode:
5812 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5813 emit_insn (gen_addsi3 (offset, offset, offset));
5814 break;
5815 case E_V4SFmode:
5816 case E_V4SImode:
5817 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5818 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5819 break;
5820 case E_V2DImode:
5821 case E_V2DFmode:
5822 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5823 break;
5824 default:
5825 abort ();
5826 }
5827 }
5828 emit_insn (gen_rotqby_ti (rot, from, offset));
5829 }
5830
5831 static void
5832 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5833 {
5834 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5835 rtx shuf = gen_reg_rtx (V4SImode);
5836 rtx insn = gen_reg_rtx (V4SImode);
5837 rtx shufc;
5838 rtx insnc;
5839 rtx mem;
5840
5841 fnaddr = force_reg (SImode, fnaddr);
5842 cxt = force_reg (SImode, cxt);
5843
5844 if (TARGET_LARGE_MEM)
5845 {
5846 rtx rotl = gen_reg_rtx (V4SImode);
5847 rtx mask = gen_reg_rtx (V4SImode);
5848 rtx bi = gen_reg_rtx (SImode);
5849 static unsigned char const shufa[16] = {
5850 2, 3, 0, 1, 18, 19, 16, 17,
5851 0, 1, 2, 3, 16, 17, 18, 19
5852 };
5853 static unsigned char const insna[16] = {
5854 0x41, 0, 0, 79,
5855 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5856 0x60, 0x80, 0, 79,
5857 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5858 };
5859
5860 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5861 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5862
5863 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5864 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5865 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5866 emit_insn (gen_selb (insn, insnc, rotl, mask));
5867
5868 mem = adjust_address (m_tramp, V4SImode, 0);
5869 emit_move_insn (mem, insn);
5870
5871 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5872 mem = adjust_address (m_tramp, Pmode, 16);
5873 emit_move_insn (mem, bi);
5874 }
5875 else
5876 {
5877 rtx scxt = gen_reg_rtx (SImode);
5878 rtx sfnaddr = gen_reg_rtx (SImode);
5879 static unsigned char const insna[16] = {
5880 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5881 0x30, 0, 0, 0,
5882 0, 0, 0, 0,
5883 0, 0, 0, 0
5884 };
5885
5886 shufc = gen_reg_rtx (TImode);
5887 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5888
5889 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5890 fits 18 bits and the last 4 are zeros. This will be true if
5891 the stack pointer is initialized to 0x3fff0 at program start,
5892 otherwise the ila instruction will be garbage. */
5893
5894 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5895 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5896 emit_insn (gen_cpat
5897 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5898 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5899 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5900
5901 mem = adjust_address (m_tramp, V4SImode, 0);
5902 emit_move_insn (mem, insn);
5903 }
5904 emit_insn (gen_sync ());
5905 }
5906
5907 static bool
5908 spu_warn_func_return (tree decl)
5909 {
5910 /* Naked functions are implemented entirely in assembly, including the
5911 return sequence, so suppress warnings about this. */
5912 return !spu_naked_function_p (decl);
5913 }
5914
5915 void
5916 spu_expand_sign_extend (rtx ops[])
5917 {
5918 unsigned char arr[16];
5919 rtx pat = gen_reg_rtx (TImode);
5920 rtx sign, c;
5921 int i, last;
5922 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5923 if (GET_MODE (ops[1]) == QImode)
5924 {
5925 sign = gen_reg_rtx (HImode);
5926 emit_insn (gen_extendqihi2 (sign, ops[1]));
5927 for (i = 0; i < 16; i++)
5928 arr[i] = 0x12;
5929 arr[last] = 0x13;
5930 }
5931 else
5932 {
5933 for (i = 0; i < 16; i++)
5934 arr[i] = 0x10;
5935 switch (GET_MODE (ops[1]))
5936 {
5937 case E_HImode:
5938 sign = gen_reg_rtx (SImode);
5939 emit_insn (gen_extendhisi2 (sign, ops[1]));
5940 arr[last] = 0x03;
5941 arr[last - 1] = 0x02;
5942 break;
5943 case E_SImode:
5944 sign = gen_reg_rtx (SImode);
5945 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5946 for (i = 0; i < 4; i++)
5947 arr[last - i] = 3 - i;
5948 break;
5949 case E_DImode:
5950 sign = gen_reg_rtx (SImode);
5951 c = gen_reg_rtx (SImode);
5952 emit_insn (gen_spu_convert (c, ops[1]));
5953 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5954 for (i = 0; i < 8; i++)
5955 arr[last - i] = 7 - i;
5956 break;
5957 default:
5958 abort ();
5959 }
5960 }
5961 emit_move_insn (pat, array_to_constant (TImode, arr));
5962 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5963 }
5964
5965 /* expand vector initialization. If there are any constant parts,
5966 load constant parts first. Then load any non-constant parts. */
5967 void
5968 spu_expand_vector_init (rtx target, rtx vals)
5969 {
5970 machine_mode mode = GET_MODE (target);
5971 int n_elts = GET_MODE_NUNITS (mode);
5972 int n_var = 0;
5973 bool all_same = true;
5974 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5975 int i;
5976
5977 first = XVECEXP (vals, 0, 0);
5978 for (i = 0; i < n_elts; ++i)
5979 {
5980 x = XVECEXP (vals, 0, i);
5981 if (!(CONST_INT_P (x)
5982 || GET_CODE (x) == CONST_DOUBLE
5983 || GET_CODE (x) == CONST_FIXED))
5984 ++n_var;
5985 else
5986 {
5987 if (first_constant == NULL_RTX)
5988 first_constant = x;
5989 }
5990 if (i > 0 && !rtx_equal_p (x, first))
5991 all_same = false;
5992 }
5993
5994 /* if all elements are the same, use splats to repeat elements */
5995 if (all_same)
5996 {
5997 if (!CONSTANT_P (first)
5998 && !register_operand (first, GET_MODE (x)))
5999 first = force_reg (GET_MODE (first), first);
6000 emit_insn (gen_spu_splats (target, first));
6001 return;
6002 }
6003
6004 /* load constant parts */
6005 if (n_var != n_elts)
6006 {
6007 if (n_var == 0)
6008 {
6009 emit_move_insn (target,
6010 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6011 }
6012 else
6013 {
6014 rtx constant_parts_rtx = copy_rtx (vals);
6015
6016 gcc_assert (first_constant != NULL_RTX);
6017 /* fill empty slots with the first constant, this increases
6018 our chance of using splats in the recursive call below. */
6019 for (i = 0; i < n_elts; ++i)
6020 {
6021 x = XVECEXP (constant_parts_rtx, 0, i);
6022 if (!(CONST_INT_P (x)
6023 || GET_CODE (x) == CONST_DOUBLE
6024 || GET_CODE (x) == CONST_FIXED))
6025 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6026 }
6027
6028 spu_expand_vector_init (target, constant_parts_rtx);
6029 }
6030 }
6031
6032 /* load variable parts */
6033 if (n_var != 0)
6034 {
6035 rtx insert_operands[4];
6036
6037 insert_operands[0] = target;
6038 insert_operands[2] = target;
6039 for (i = 0; i < n_elts; ++i)
6040 {
6041 x = XVECEXP (vals, 0, i);
6042 if (!(CONST_INT_P (x)
6043 || GET_CODE (x) == CONST_DOUBLE
6044 || GET_CODE (x) == CONST_FIXED))
6045 {
6046 if (!register_operand (x, GET_MODE (x)))
6047 x = force_reg (GET_MODE (x), x);
6048 insert_operands[1] = x;
6049 insert_operands[3] = GEN_INT (i);
6050 spu_builtin_insert (insert_operands);
6051 }
6052 }
6053 }
6054 }
6055
6056 /* Return insn index for the vector compare instruction for given CODE,
6057 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6058
6059 static int
6060 get_vec_cmp_insn (enum rtx_code code,
6061 machine_mode dest_mode,
6062 machine_mode op_mode)
6063
6064 {
6065 switch (code)
6066 {
6067 case EQ:
6068 if (dest_mode == V16QImode && op_mode == V16QImode)
6069 return CODE_FOR_ceq_v16qi;
6070 if (dest_mode == V8HImode && op_mode == V8HImode)
6071 return CODE_FOR_ceq_v8hi;
6072 if (dest_mode == V4SImode && op_mode == V4SImode)
6073 return CODE_FOR_ceq_v4si;
6074 if (dest_mode == V4SImode && op_mode == V4SFmode)
6075 return CODE_FOR_ceq_v4sf;
6076 if (dest_mode == V2DImode && op_mode == V2DFmode)
6077 return CODE_FOR_ceq_v2df;
6078 break;
6079 case GT:
6080 if (dest_mode == V16QImode && op_mode == V16QImode)
6081 return CODE_FOR_cgt_v16qi;
6082 if (dest_mode == V8HImode && op_mode == V8HImode)
6083 return CODE_FOR_cgt_v8hi;
6084 if (dest_mode == V4SImode && op_mode == V4SImode)
6085 return CODE_FOR_cgt_v4si;
6086 if (dest_mode == V4SImode && op_mode == V4SFmode)
6087 return CODE_FOR_cgt_v4sf;
6088 if (dest_mode == V2DImode && op_mode == V2DFmode)
6089 return CODE_FOR_cgt_v2df;
6090 break;
6091 case GTU:
6092 if (dest_mode == V16QImode && op_mode == V16QImode)
6093 return CODE_FOR_clgt_v16qi;
6094 if (dest_mode == V8HImode && op_mode == V8HImode)
6095 return CODE_FOR_clgt_v8hi;
6096 if (dest_mode == V4SImode && op_mode == V4SImode)
6097 return CODE_FOR_clgt_v4si;
6098 break;
6099 default:
6100 break;
6101 }
6102 return -1;
6103 }
6104
6105 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6106 DMODE is expected destination mode. This is a recursive function. */
6107
6108 static rtx
6109 spu_emit_vector_compare (enum rtx_code rcode,
6110 rtx op0, rtx op1,
6111 machine_mode dmode)
6112 {
6113 int vec_cmp_insn;
6114 rtx mask;
6115 machine_mode dest_mode;
6116 machine_mode op_mode = GET_MODE (op1);
6117
6118 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6119
6120 /* Floating point vector compare instructions uses destination V4SImode.
6121 Double floating point vector compare instructions uses destination V2DImode.
6122 Move destination to appropriate mode later. */
6123 if (dmode == V4SFmode)
6124 dest_mode = V4SImode;
6125 else if (dmode == V2DFmode)
6126 dest_mode = V2DImode;
6127 else
6128 dest_mode = dmode;
6129
6130 mask = gen_reg_rtx (dest_mode);
6131 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6132
6133 if (vec_cmp_insn == -1)
6134 {
6135 bool swap_operands = false;
6136 bool try_again = false;
6137 switch (rcode)
6138 {
6139 case LT:
6140 rcode = GT;
6141 swap_operands = true;
6142 try_again = true;
6143 break;
6144 case LTU:
6145 rcode = GTU;
6146 swap_operands = true;
6147 try_again = true;
6148 break;
6149 case NE:
6150 case UNEQ:
6151 case UNLE:
6152 case UNLT:
6153 case UNGE:
6154 case UNGT:
6155 case UNORDERED:
6156 /* Treat A != B as ~(A==B). */
6157 {
6158 enum rtx_code rev_code;
6159 enum insn_code nor_code;
6160 rtx rev_mask;
6161
6162 rev_code = reverse_condition_maybe_unordered (rcode);
6163 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6164
6165 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6166 gcc_assert (nor_code != CODE_FOR_nothing);
6167 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6168 if (dmode != dest_mode)
6169 {
6170 rtx temp = gen_reg_rtx (dest_mode);
6171 convert_move (temp, mask, 0);
6172 return temp;
6173 }
6174 return mask;
6175 }
6176 break;
6177 case GE:
6178 case GEU:
6179 case LE:
6180 case LEU:
6181 /* Try GT/GTU/LT/LTU OR EQ */
6182 {
6183 rtx c_rtx, eq_rtx;
6184 enum insn_code ior_code;
6185 enum rtx_code new_code;
6186
6187 switch (rcode)
6188 {
6189 case GE: new_code = GT; break;
6190 case GEU: new_code = GTU; break;
6191 case LE: new_code = LT; break;
6192 case LEU: new_code = LTU; break;
6193 default:
6194 gcc_unreachable ();
6195 }
6196
6197 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6198 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6199
6200 ior_code = optab_handler (ior_optab, dest_mode);
6201 gcc_assert (ior_code != CODE_FOR_nothing);
6202 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6203 if (dmode != dest_mode)
6204 {
6205 rtx temp = gen_reg_rtx (dest_mode);
6206 convert_move (temp, mask, 0);
6207 return temp;
6208 }
6209 return mask;
6210 }
6211 break;
6212 case LTGT:
6213 /* Try LT OR GT */
6214 {
6215 rtx lt_rtx, gt_rtx;
6216 enum insn_code ior_code;
6217
6218 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6219 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6220
6221 ior_code = optab_handler (ior_optab, dest_mode);
6222 gcc_assert (ior_code != CODE_FOR_nothing);
6223 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6224 if (dmode != dest_mode)
6225 {
6226 rtx temp = gen_reg_rtx (dest_mode);
6227 convert_move (temp, mask, 0);
6228 return temp;
6229 }
6230 return mask;
6231 }
6232 break;
6233 case ORDERED:
6234 /* Implement as (A==A) & (B==B) */
6235 {
6236 rtx a_rtx, b_rtx;
6237 enum insn_code and_code;
6238
6239 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6240 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6241
6242 and_code = optab_handler (and_optab, dest_mode);
6243 gcc_assert (and_code != CODE_FOR_nothing);
6244 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6245 if (dmode != dest_mode)
6246 {
6247 rtx temp = gen_reg_rtx (dest_mode);
6248 convert_move (temp, mask, 0);
6249 return temp;
6250 }
6251 return mask;
6252 }
6253 break;
6254 default:
6255 gcc_unreachable ();
6256 }
6257
6258 /* You only get two chances. */
6259 if (try_again)
6260 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6261
6262 gcc_assert (vec_cmp_insn != -1);
6263
6264 if (swap_operands)
6265 {
6266 rtx tmp;
6267 tmp = op0;
6268 op0 = op1;
6269 op1 = tmp;
6270 }
6271 }
6272
6273 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6274 if (dmode != dest_mode)
6275 {
6276 rtx temp = gen_reg_rtx (dest_mode);
6277 convert_move (temp, mask, 0);
6278 return temp;
6279 }
6280 return mask;
6281 }
6282
6283
6284 /* Emit vector conditional expression.
6285 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6286 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6287
6288 int
6289 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6290 rtx cond, rtx cc_op0, rtx cc_op1)
6291 {
6292 machine_mode dest_mode = GET_MODE (dest);
6293 enum rtx_code rcode = GET_CODE (cond);
6294 rtx mask;
6295
6296 /* Get the vector mask for the given relational operations. */
6297 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6298
6299 emit_insn(gen_selb (dest, op2, op1, mask));
6300
6301 return 1;
6302 }
6303
6304 static rtx
6305 spu_force_reg (machine_mode mode, rtx op)
6306 {
6307 rtx x, r;
6308 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6309 {
6310 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6311 || GET_MODE (op) == BLKmode)
6312 return force_reg (mode, convert_to_mode (mode, op, 0));
6313 abort ();
6314 }
6315
6316 r = force_reg (GET_MODE (op), op);
6317 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6318 {
6319 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6320 if (x)
6321 return x;
6322 }
6323
6324 x = gen_reg_rtx (mode);
6325 emit_insn (gen_spu_convert (x, r));
6326 return x;
6327 }
6328
6329 static void
6330 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6331 {
6332 HOST_WIDE_INT v = 0;
6333 int lsbits;
6334 /* Check the range of immediate operands. */
6335 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6336 {
6337 int range = p - SPU_BTI_7;
6338
6339 if (!CONSTANT_P (op))
6340 error ("%s expects an integer literal in the range [%d, %d]",
6341 d->name,
6342 spu_builtin_range[range].low, spu_builtin_range[range].high);
6343
6344 if (GET_CODE (op) == CONST
6345 && (GET_CODE (XEXP (op, 0)) == PLUS
6346 || GET_CODE (XEXP (op, 0)) == MINUS))
6347 {
6348 v = INTVAL (XEXP (XEXP (op, 0), 1));
6349 op = XEXP (XEXP (op, 0), 0);
6350 }
6351 else if (GET_CODE (op) == CONST_INT)
6352 v = INTVAL (op);
6353 else if (GET_CODE (op) == CONST_VECTOR
6354 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6355 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6356
6357 /* The default for v is 0 which is valid in every range. */
6358 if (v < spu_builtin_range[range].low
6359 || v > spu_builtin_range[range].high)
6360 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6361 d->name,
6362 spu_builtin_range[range].low, spu_builtin_range[range].high,
6363 v);
6364
6365 switch (p)
6366 {
6367 case SPU_BTI_S10_4:
6368 lsbits = 4;
6369 break;
6370 case SPU_BTI_U16_2:
6371 /* This is only used in lqa, and stqa. Even though the insns
6372 encode 16 bits of the address (all but the 2 least
6373 significant), only 14 bits are used because it is masked to
6374 be 16 byte aligned. */
6375 lsbits = 4;
6376 break;
6377 case SPU_BTI_S16_2:
6378 /* This is used for lqr and stqr. */
6379 lsbits = 2;
6380 break;
6381 default:
6382 lsbits = 0;
6383 }
6384
6385 if (GET_CODE (op) == LABEL_REF
6386 || (GET_CODE (op) == SYMBOL_REF
6387 && SYMBOL_REF_FUNCTION_P (op))
6388 || (v & ((1 << lsbits) - 1)) != 0)
6389 warning (0, "%d least significant bits of %s are ignored", lsbits,
6390 d->name);
6391 }
6392 }
6393
6394
6395 static int
6396 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6397 rtx target, rtx ops[])
6398 {
6399 enum insn_code icode = (enum insn_code) d->icode;
6400 int i = 0, a;
6401
6402 /* Expand the arguments into rtl. */
6403
6404 if (d->parm[0] != SPU_BTI_VOID)
6405 ops[i++] = target;
6406
6407 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6408 {
6409 tree arg = CALL_EXPR_ARG (exp, a);
6410 if (arg == 0)
6411 abort ();
6412 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6413 }
6414
6415 gcc_assert (i == insn_data[icode].n_generator_args);
6416 return i;
6417 }
6418
6419 static rtx
6420 spu_expand_builtin_1 (struct spu_builtin_description *d,
6421 tree exp, rtx target)
6422 {
6423 rtx pat;
6424 rtx ops[8];
6425 enum insn_code icode = (enum insn_code) d->icode;
6426 machine_mode mode, tmode;
6427 int i, p;
6428 int n_operands;
6429 tree return_type;
6430
6431 /* Set up ops[] with values from arglist. */
6432 n_operands = expand_builtin_args (d, exp, target, ops);
6433
6434 /* Handle the target operand which must be operand 0. */
6435 i = 0;
6436 if (d->parm[0] != SPU_BTI_VOID)
6437 {
6438
6439 /* We prefer the mode specified for the match_operand otherwise
6440 use the mode from the builtin function prototype. */
6441 tmode = insn_data[d->icode].operand[0].mode;
6442 if (tmode == VOIDmode)
6443 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6444
6445 /* Try to use target because not using it can lead to extra copies
6446 and when we are using all of the registers extra copies leads
6447 to extra spills. */
6448 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6449 ops[0] = target;
6450 else
6451 target = ops[0] = gen_reg_rtx (tmode);
6452
6453 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6454 abort ();
6455
6456 i++;
6457 }
6458
6459 if (d->fcode == SPU_MASK_FOR_LOAD)
6460 {
6461 machine_mode mode = insn_data[icode].operand[1].mode;
6462 tree arg;
6463 rtx addr, op, pat;
6464
6465 /* get addr */
6466 arg = CALL_EXPR_ARG (exp, 0);
6467 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6468 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6469 addr = memory_address (mode, op);
6470
6471 /* negate addr */
6472 op = gen_reg_rtx (GET_MODE (addr));
6473 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6474 op = gen_rtx_MEM (mode, op);
6475
6476 pat = GEN_FCN (icode) (target, op);
6477 if (!pat)
6478 return 0;
6479 emit_insn (pat);
6480 return target;
6481 }
6482
6483 /* Ignore align_hint, but still expand it's args in case they have
6484 side effects. */
6485 if (icode == CODE_FOR_spu_align_hint)
6486 return 0;
6487
6488 /* Handle the rest of the operands. */
6489 for (p = 1; i < n_operands; i++, p++)
6490 {
6491 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6492 mode = insn_data[d->icode].operand[i].mode;
6493 else
6494 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6495
6496 /* mode can be VOIDmode here for labels */
6497
6498 /* For specific intrinsics with an immediate operand, e.g.,
6499 si_ai(), we sometimes need to convert the scalar argument to a
6500 vector argument by splatting the scalar. */
6501 if (VECTOR_MODE_P (mode)
6502 && (GET_CODE (ops[i]) == CONST_INT
6503 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6504 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6505 {
6506 if (GET_CODE (ops[i]) == CONST_INT)
6507 ops[i] = spu_const (mode, INTVAL (ops[i]));
6508 else
6509 {
6510 rtx reg = gen_reg_rtx (mode);
6511 machine_mode imode = GET_MODE_INNER (mode);
6512 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6513 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6514 if (imode != GET_MODE (ops[i]))
6515 ops[i] = convert_to_mode (imode, ops[i],
6516 TYPE_UNSIGNED (spu_builtin_types
6517 [d->parm[i]]));
6518 emit_insn (gen_spu_splats (reg, ops[i]));
6519 ops[i] = reg;
6520 }
6521 }
6522
6523 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6524
6525 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6526 ops[i] = spu_force_reg (mode, ops[i]);
6527 }
6528
6529 switch (n_operands)
6530 {
6531 case 0:
6532 pat = GEN_FCN (icode) (0);
6533 break;
6534 case 1:
6535 pat = GEN_FCN (icode) (ops[0]);
6536 break;
6537 case 2:
6538 pat = GEN_FCN (icode) (ops[0], ops[1]);
6539 break;
6540 case 3:
6541 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6542 break;
6543 case 4:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6545 break;
6546 case 5:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6548 break;
6549 case 6:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6551 break;
6552 default:
6553 abort ();
6554 }
6555
6556 if (!pat)
6557 abort ();
6558
6559 if (d->type == B_CALL || d->type == B_BISLED)
6560 emit_call_insn (pat);
6561 else if (d->type == B_JUMP)
6562 {
6563 emit_jump_insn (pat);
6564 emit_barrier ();
6565 }
6566 else
6567 emit_insn (pat);
6568
6569 return_type = spu_builtin_types[d->parm[0]];
6570 if (d->parm[0] != SPU_BTI_VOID
6571 && GET_MODE (target) != TYPE_MODE (return_type))
6572 {
6573 /* target is the return value. It should always be the mode of
6574 the builtin function prototype. */
6575 target = spu_force_reg (TYPE_MODE (return_type), target);
6576 }
6577
6578 return target;
6579 }
6580
6581 rtx
6582 spu_expand_builtin (tree exp,
6583 rtx target,
6584 rtx subtarget ATTRIBUTE_UNUSED,
6585 machine_mode mode ATTRIBUTE_UNUSED,
6586 int ignore ATTRIBUTE_UNUSED)
6587 {
6588 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6589 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6590 struct spu_builtin_description *d;
6591
6592 if (fcode < NUM_SPU_BUILTINS)
6593 {
6594 d = &spu_builtins[fcode];
6595
6596 return spu_expand_builtin_1 (d, exp, target);
6597 }
6598 abort ();
6599 }
6600
6601 /* Implement targetm.vectorize.builtin_mask_for_load. */
6602 static tree
6603 spu_builtin_mask_for_load (void)
6604 {
6605 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6606 }
6607
6608 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6609 static int
6610 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6611 tree vectype,
6612 int misalign ATTRIBUTE_UNUSED)
6613 {
6614 unsigned elements;
6615
6616 switch (type_of_cost)
6617 {
6618 case scalar_stmt:
6619 case vector_stmt:
6620 case vector_load:
6621 case vector_store:
6622 case vec_to_scalar:
6623 case scalar_to_vec:
6624 case cond_branch_not_taken:
6625 case vec_perm:
6626 case vec_promote_demote:
6627 return 1;
6628
6629 case scalar_store:
6630 return 10;
6631
6632 case scalar_load:
6633 /* Load + rotate. */
6634 return 2;
6635
6636 case unaligned_load:
6637 case vector_gather_load:
6638 case vector_scatter_store:
6639 return 2;
6640
6641 case cond_branch_taken:
6642 return 6;
6643
6644 case vec_construct:
6645 elements = TYPE_VECTOR_SUBPARTS (vectype);
6646 return elements / 2 + 1;
6647
6648 default:
6649 gcc_unreachable ();
6650 }
6651 }
6652
6653 /* Implement targetm.vectorize.init_cost. */
6654
6655 static void *
6656 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6657 {
6658 unsigned *cost = XNEWVEC (unsigned, 3);
6659 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6660 return cost;
6661 }
6662
6663 /* Implement targetm.vectorize.add_stmt_cost. */
6664
6665 static unsigned
6666 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6667 struct _stmt_vec_info *stmt_info, int misalign,
6668 enum vect_cost_model_location where)
6669 {
6670 unsigned *cost = (unsigned *) data;
6671 unsigned retval = 0;
6672
6673 if (flag_vect_cost_model)
6674 {
6675 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6676 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6677
6678 /* Statements in an inner loop relative to the loop being
6679 vectorized are weighted more heavily. The value here is
6680 arbitrary and could potentially be improved with analysis. */
6681 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6682 count *= 50; /* FIXME. */
6683
6684 retval = (unsigned) (count * stmt_cost);
6685 cost[where] += retval;
6686 }
6687
6688 return retval;
6689 }
6690
6691 /* Implement targetm.vectorize.finish_cost. */
6692
6693 static void
6694 spu_finish_cost (void *data, unsigned *prologue_cost,
6695 unsigned *body_cost, unsigned *epilogue_cost)
6696 {
6697 unsigned *cost = (unsigned *) data;
6698 *prologue_cost = cost[vect_prologue];
6699 *body_cost = cost[vect_body];
6700 *epilogue_cost = cost[vect_epilogue];
6701 }
6702
6703 /* Implement targetm.vectorize.destroy_cost_data. */
6704
6705 static void
6706 spu_destroy_cost_data (void *data)
6707 {
6708 free (data);
6709 }
6710
6711 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6712 after applying N number of iterations. This routine does not determine
6713 how may iterations are required to reach desired alignment. */
6714
6715 static bool
6716 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6717 {
6718 if (is_packed)
6719 return false;
6720
6721 /* All other types are naturally aligned. */
6722 return true;
6723 }
6724
6725 /* Return the appropriate mode for a named address pointer. */
6726 static scalar_int_mode
6727 spu_addr_space_pointer_mode (addr_space_t addrspace)
6728 {
6729 switch (addrspace)
6730 {
6731 case ADDR_SPACE_GENERIC:
6732 return ptr_mode;
6733 case ADDR_SPACE_EA:
6734 return EAmode;
6735 default:
6736 gcc_unreachable ();
6737 }
6738 }
6739
6740 /* Return the appropriate mode for a named address address. */
6741 static scalar_int_mode
6742 spu_addr_space_address_mode (addr_space_t addrspace)
6743 {
6744 switch (addrspace)
6745 {
6746 case ADDR_SPACE_GENERIC:
6747 return Pmode;
6748 case ADDR_SPACE_EA:
6749 return EAmode;
6750 default:
6751 gcc_unreachable ();
6752 }
6753 }
6754
6755 /* Determine if one named address space is a subset of another. */
6756
6757 static bool
6758 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6759 {
6760 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6761 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6762
6763 if (subset == superset)
6764 return true;
6765
6766 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6767 being subsets but instead as disjoint address spaces. */
6768 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6769 return false;
6770
6771 else
6772 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6773 }
6774
6775 /* Convert from one address space to another. */
6776 static rtx
6777 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6778 {
6779 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6780 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6781
6782 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6783 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6784
6785 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6786 {
6787 rtx result, ls;
6788
6789 ls = gen_const_mem (DImode,
6790 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6791 set_mem_align (ls, 128);
6792
6793 result = gen_reg_rtx (Pmode);
6794 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6795 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6796 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6797 ls, const0_rtx, Pmode, 1);
6798
6799 emit_insn (gen_subsi3 (result, op, ls));
6800
6801 return result;
6802 }
6803
6804 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6805 {
6806 rtx result, ls;
6807
6808 ls = gen_const_mem (DImode,
6809 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6810 set_mem_align (ls, 128);
6811
6812 result = gen_reg_rtx (EAmode);
6813 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6814 op = force_reg (Pmode, op);
6815 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6816 ls, const0_rtx, EAmode, 1);
6817 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6818
6819 if (EAmode == SImode)
6820 emit_insn (gen_addsi3 (result, op, ls));
6821 else
6822 emit_insn (gen_adddi3 (result, op, ls));
6823
6824 return result;
6825 }
6826
6827 else
6828 gcc_unreachable ();
6829 }
6830
6831
6832 /* Count the total number of instructions in each pipe and return the
6833 maximum, which is used as the Minimum Iteration Interval (MII)
6834 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6835 -2 are instructions that can go in pipe0 or pipe1. */
6836 static int
6837 spu_sms_res_mii (struct ddg *g)
6838 {
6839 int i;
6840 unsigned t[4] = {0, 0, 0, 0};
6841
6842 for (i = 0; i < g->num_nodes; i++)
6843 {
6844 rtx_insn *insn = g->nodes[i].insn;
6845 int p = get_pipe (insn) + 2;
6846
6847 gcc_assert (p >= 0);
6848 gcc_assert (p < 4);
6849
6850 t[p]++;
6851 if (dump_file && INSN_P (insn))
6852 fprintf (dump_file, "i%d %s %d %d\n",
6853 INSN_UID (insn),
6854 insn_data[INSN_CODE(insn)].name,
6855 p, t[p]);
6856 }
6857 if (dump_file)
6858 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6859
6860 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6861 }
6862
6863
6864 void
6865 spu_init_expanders (void)
6866 {
6867 if (cfun)
6868 {
6869 rtx r0, r1;
6870 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6871 frame_pointer_needed is true. We don't know that until we're
6872 expanding the prologue. */
6873 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6874
6875 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6876 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6877 to be treated as aligned, so generate them here. */
6878 r0 = gen_reg_rtx (SImode);
6879 r1 = gen_reg_rtx (SImode);
6880 mark_reg_pointer (r0, 128);
6881 mark_reg_pointer (r1, 128);
6882 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6883 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6884 }
6885 }
6886
6887 static scalar_int_mode
6888 spu_libgcc_cmp_return_mode (void)
6889 {
6890
6891 /* For SPU word mode is TI mode so it is better to use SImode
6892 for compare returns. */
6893 return SImode;
6894 }
6895
6896 static scalar_int_mode
6897 spu_libgcc_shift_count_mode (void)
6898 {
6899 /* For SPU word mode is TI mode so it is better to use SImode
6900 for shift counts. */
6901 return SImode;
6902 }
6903
6904 /* Implement targetm.section_type_flags. */
6905 static unsigned int
6906 spu_section_type_flags (tree decl, const char *name, int reloc)
6907 {
6908 /* .toe needs to have type @nobits. */
6909 if (strcmp (name, ".toe") == 0)
6910 return SECTION_BSS;
6911 /* Don't load _ea into the current address space. */
6912 if (strcmp (name, "._ea") == 0)
6913 return SECTION_WRITE | SECTION_DEBUG;
6914 return default_section_type_flags (decl, name, reloc);
6915 }
6916
6917 /* Implement targetm.select_section. */
6918 static section *
6919 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6920 {
6921 /* Variables and constants defined in the __ea address space
6922 go into a special section named "._ea". */
6923 if (TREE_TYPE (decl) != error_mark_node
6924 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6925 {
6926 /* We might get called with string constants, but get_named_section
6927 doesn't like them as they are not DECLs. Also, we need to set
6928 flags in that case. */
6929 if (!DECL_P (decl))
6930 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6931
6932 return get_named_section (decl, "._ea", reloc);
6933 }
6934
6935 return default_elf_select_section (decl, reloc, align);
6936 }
6937
6938 /* Implement targetm.unique_section. */
6939 static void
6940 spu_unique_section (tree decl, int reloc)
6941 {
6942 /* We don't support unique section names in the __ea address
6943 space for now. */
6944 if (TREE_TYPE (decl) != error_mark_node
6945 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6946 return;
6947
6948 default_unique_section (decl, reloc);
6949 }
6950
6951 /* Generate a constant or register which contains 2^SCALE. We assume
6952 the result is valid for MODE. Currently, MODE must be V4SFmode and
6953 SCALE must be SImode. */
6954 rtx
6955 spu_gen_exp2 (machine_mode mode, rtx scale)
6956 {
6957 gcc_assert (mode == V4SFmode);
6958 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6959 if (GET_CODE (scale) != CONST_INT)
6960 {
6961 /* unsigned int exp = (127 + scale) << 23;
6962 __vector float m = (__vector float) spu_splats (exp); */
6963 rtx reg = force_reg (SImode, scale);
6964 rtx exp = gen_reg_rtx (SImode);
6965 rtx mul = gen_reg_rtx (mode);
6966 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6967 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6968 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6969 return mul;
6970 }
6971 else
6972 {
6973 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6974 unsigned char arr[16];
6975 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6976 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6977 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6978 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6979 return array_to_constant (mode, arr);
6980 }
6981 }
6982
6983 /* After reload, just change the convert into a move instruction
6984 or a dead instruction. */
6985 void
6986 spu_split_convert (rtx ops[])
6987 {
6988 if (REGNO (ops[0]) == REGNO (ops[1]))
6989 emit_note (NOTE_INSN_DELETED);
6990 else
6991 {
6992 /* Use TImode always as this might help hard reg copyprop. */
6993 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6994 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6995 emit_insn (gen_move_insn (op0, op1));
6996 }
6997 }
6998
6999 void
7000 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7001 {
7002 fprintf (file, "# profile\n");
7003 fprintf (file, "brsl $75, _mcount\n");
7004 }
7005
7006 /* Implement targetm.ref_may_alias_errno. */
7007 static bool
7008 spu_ref_may_alias_errno (ao_ref *ref)
7009 {
7010 tree base = ao_ref_base (ref);
7011
7012 /* With SPU newlib, errno is defined as something like
7013 _impure_data._errno
7014 The default implementation of this target macro does not
7015 recognize such expressions, so special-code for it here. */
7016
7017 if (TREE_CODE (base) == VAR_DECL
7018 && !TREE_STATIC (base)
7019 && DECL_EXTERNAL (base)
7020 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7021 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7022 "_impure_data") == 0
7023 /* _errno is the first member of _impure_data. */
7024 && ref->offset == 0)
7025 return true;
7026
7027 return default_ref_may_alias_errno (ref);
7028 }
7029
7030 /* Output thunk to FILE that implements a C++ virtual function call (with
7031 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7032 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7033 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7034 relative to the resulting this pointer. */
7035
7036 static void
7037 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7038 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7039 tree function)
7040 {
7041 rtx op[8];
7042
7043 /* Make sure unwind info is emitted for the thunk if needed. */
7044 final_start_function (emit_barrier (), file, 1);
7045
7046 /* Operand 0 is the target function. */
7047 op[0] = XEXP (DECL_RTL (function), 0);
7048
7049 /* Operand 1 is the 'this' pointer. */
7050 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7051 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7052 else
7053 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7054
7055 /* Operands 2/3 are the low/high halfwords of delta. */
7056 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7057 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7058
7059 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7060 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7061 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7062
7063 /* Operands 6/7 are temporary registers. */
7064 op[6] = gen_rtx_REG (Pmode, 79);
7065 op[7] = gen_rtx_REG (Pmode, 78);
7066
7067 /* Add DELTA to this pointer. */
7068 if (delta)
7069 {
7070 if (delta >= -0x200 && delta < 0x200)
7071 output_asm_insn ("ai\t%1,%1,%2", op);
7072 else if (delta >= -0x8000 && delta < 0x8000)
7073 {
7074 output_asm_insn ("il\t%6,%2", op);
7075 output_asm_insn ("a\t%1,%1,%6", op);
7076 }
7077 else
7078 {
7079 output_asm_insn ("ilhu\t%6,%3", op);
7080 output_asm_insn ("iohl\t%6,%2", op);
7081 output_asm_insn ("a\t%1,%1,%6", op);
7082 }
7083 }
7084
7085 /* Perform vcall adjustment. */
7086 if (vcall_offset)
7087 {
7088 output_asm_insn ("lqd\t%7,0(%1)", op);
7089 output_asm_insn ("rotqby\t%7,%7,%1", op);
7090
7091 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7092 output_asm_insn ("ai\t%7,%7,%4", op);
7093 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7094 {
7095 output_asm_insn ("il\t%6,%4", op);
7096 output_asm_insn ("a\t%7,%7,%6", op);
7097 }
7098 else
7099 {
7100 output_asm_insn ("ilhu\t%6,%5", op);
7101 output_asm_insn ("iohl\t%6,%4", op);
7102 output_asm_insn ("a\t%7,%7,%6", op);
7103 }
7104
7105 output_asm_insn ("lqd\t%6,0(%7)", op);
7106 output_asm_insn ("rotqby\t%6,%6,%7", op);
7107 output_asm_insn ("a\t%1,%1,%6", op);
7108 }
7109
7110 /* Jump to target. */
7111 output_asm_insn ("br\t%0", op);
7112
7113 final_end_function ();
7114 }
7115
7116 /* Canonicalize a comparison from one we don't have to one we do have. */
7117 static void
7118 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7119 bool op0_preserve_value)
7120 {
7121 if (!op0_preserve_value
7122 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7123 {
7124 rtx tem = *op0;
7125 *op0 = *op1;
7126 *op1 = tem;
7127 *code = (int)swap_condition ((enum rtx_code)*code);
7128 }
7129 }
7130
7131 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7132 to perform. MEM is the memory on which to operate. VAL is the second
7133 operand of the binary operator. BEFORE and AFTER are optional locations to
7134 return the value of MEM either before of after the operation. */
7135 void
7136 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7137 rtx orig_before, rtx orig_after)
7138 {
7139 machine_mode mode = GET_MODE (mem);
7140 rtx before = orig_before, after = orig_after;
7141
7142 if (before == NULL_RTX)
7143 before = gen_reg_rtx (mode);
7144
7145 emit_move_insn (before, mem);
7146
7147 if (code == MULT) /* NAND operation */
7148 {
7149 rtx x = expand_simple_binop (mode, AND, before, val,
7150 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7151 after = expand_simple_unop (mode, NOT, x, after, 1);
7152 }
7153 else
7154 {
7155 after = expand_simple_binop (mode, code, before, val,
7156 after, 1, OPTAB_LIB_WIDEN);
7157 }
7158
7159 emit_move_insn (mem, after);
7160
7161 if (orig_after && after != orig_after)
7162 emit_move_insn (orig_after, after);
7163 }
7164
7165 /* Implement TARGET_MODES_TIEABLE_P. */
7166
7167 static bool
7168 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7169 {
7170 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7171 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7172 }
7173
7174 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7175 in the lowpart of a register, which is only true for SPU. */
7176
7177 static bool
7178 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7179 {
7180 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7181 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7182 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7183 }
7184
7185 /* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7186
7187 static bool
7188 spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
7189 {
7190 return inprec <= 32 && outprec <= inprec;
7191 }
7192
7193 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7194
7195 Make all static objects 16-byte aligned. This allows us to assume
7196 they are also padded to 16 bytes, which means we can use a single
7197 load or store instruction to access them. */
7198
7199 static HOST_WIDE_INT
7200 spu_static_rtx_alignment (machine_mode mode)
7201 {
7202 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7203 }
7204
7205 /* Implement TARGET_CONSTANT_ALIGNMENT.
7206
7207 Make all static objects 16-byte aligned. This allows us to assume
7208 they are also padded to 16 bytes, which means we can use a single
7209 load or store instruction to access them. */
7210
7211 static HOST_WIDE_INT
7212 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7213 {
7214 return MAX (align, 128);
7215 }
7216 \f
7217 /* Table of machine attributes. */
7218 static const struct attribute_spec spu_attribute_table[] =
7219 {
7220 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7221 affects_type_identity, handler, exclude } */
7222 { "naked", 0, 0, true, false, false, false,
7223 spu_handle_fndecl_attribute, NULL },
7224 { "spu_vector", 0, 0, false, true, false, false,
7225 spu_handle_vector_attribute, NULL },
7226 { NULL, 0, 0, false, false, false, false, NULL, NULL }
7227 };
7228
7229 /* TARGET overrides. */
7230
7231 #undef TARGET_LRA_P
7232 #define TARGET_LRA_P hook_bool_void_false
7233
7234 #undef TARGET_ADDR_SPACE_POINTER_MODE
7235 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7236
7237 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7238 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7239
7240 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7241 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7242 spu_addr_space_legitimate_address_p
7243
7244 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7245 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7246
7247 #undef TARGET_ADDR_SPACE_SUBSET_P
7248 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7249
7250 #undef TARGET_ADDR_SPACE_CONVERT
7251 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7252
7253 #undef TARGET_INIT_BUILTINS
7254 #define TARGET_INIT_BUILTINS spu_init_builtins
7255 #undef TARGET_BUILTIN_DECL
7256 #define TARGET_BUILTIN_DECL spu_builtin_decl
7257
7258 #undef TARGET_EXPAND_BUILTIN
7259 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7260
7261 #undef TARGET_UNWIND_WORD_MODE
7262 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7263
7264 #undef TARGET_LEGITIMIZE_ADDRESS
7265 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7266
7267 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7268 and .quad for the debugger. When it is known that the assembler is fixed,
7269 these can be removed. */
7270 #undef TARGET_ASM_UNALIGNED_SI_OP
7271 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7272
7273 #undef TARGET_ASM_ALIGNED_DI_OP
7274 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7275
7276 /* The .8byte directive doesn't seem to work well for a 32 bit
7277 architecture. */
7278 #undef TARGET_ASM_UNALIGNED_DI_OP
7279 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7280
7281 #undef TARGET_RTX_COSTS
7282 #define TARGET_RTX_COSTS spu_rtx_costs
7283
7284 #undef TARGET_ADDRESS_COST
7285 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7286
7287 #undef TARGET_SCHED_ISSUE_RATE
7288 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7289
7290 #undef TARGET_SCHED_INIT_GLOBAL
7291 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7292
7293 #undef TARGET_SCHED_INIT
7294 #define TARGET_SCHED_INIT spu_sched_init
7295
7296 #undef TARGET_SCHED_VARIABLE_ISSUE
7297 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7298
7299 #undef TARGET_SCHED_REORDER
7300 #define TARGET_SCHED_REORDER spu_sched_reorder
7301
7302 #undef TARGET_SCHED_REORDER2
7303 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7304
7305 #undef TARGET_SCHED_ADJUST_COST
7306 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7307
7308 #undef TARGET_ATTRIBUTE_TABLE
7309 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7310
7311 #undef TARGET_ASM_INTEGER
7312 #define TARGET_ASM_INTEGER spu_assemble_integer
7313
7314 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7315 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7316
7317 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7318 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7319
7320 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7321 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7322
7323 #undef TARGET_ASM_GLOBALIZE_LABEL
7324 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7325
7326 #undef TARGET_PASS_BY_REFERENCE
7327 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7328
7329 #undef TARGET_FUNCTION_ARG
7330 #define TARGET_FUNCTION_ARG spu_function_arg
7331
7332 #undef TARGET_FUNCTION_ARG_ADVANCE
7333 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7334
7335 #undef TARGET_FUNCTION_ARG_OFFSET
7336 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7337
7338 #undef TARGET_FUNCTION_ARG_PADDING
7339 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7340
7341 #undef TARGET_MUST_PASS_IN_STACK
7342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7343
7344 #undef TARGET_BUILD_BUILTIN_VA_LIST
7345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7346
7347 #undef TARGET_EXPAND_BUILTIN_VA_START
7348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7349
7350 #undef TARGET_SETUP_INCOMING_VARARGS
7351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7352
7353 #undef TARGET_MACHINE_DEPENDENT_REORG
7354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7355
7356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7358
7359 #undef TARGET_INIT_LIBFUNCS
7360 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7361
7362 #undef TARGET_RETURN_IN_MEMORY
7363 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7364
7365 #undef TARGET_ENCODE_SECTION_INFO
7366 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7367
7368 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7369 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7370
7371 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7372 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7373
7374 #undef TARGET_VECTORIZE_INIT_COST
7375 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7376
7377 #undef TARGET_VECTORIZE_ADD_STMT_COST
7378 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7379
7380 #undef TARGET_VECTORIZE_FINISH_COST
7381 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7382
7383 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7384 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7385
7386 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7387 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7388
7389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7391
7392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7394
7395 #undef TARGET_SCHED_SMS_RES_MII
7396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7397
7398 #undef TARGET_SECTION_TYPE_FLAGS
7399 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7400
7401 #undef TARGET_ASM_SELECT_SECTION
7402 #define TARGET_ASM_SELECT_SECTION spu_select_section
7403
7404 #undef TARGET_ASM_UNIQUE_SECTION
7405 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7406
7407 #undef TARGET_LEGITIMATE_ADDRESS_P
7408 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7409
7410 #undef TARGET_LEGITIMATE_CONSTANT_P
7411 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7412
7413 #undef TARGET_TRAMPOLINE_INIT
7414 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7415
7416 #undef TARGET_WARN_FUNC_RETURN
7417 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7418
7419 #undef TARGET_OPTION_OVERRIDE
7420 #define TARGET_OPTION_OVERRIDE spu_option_override
7421
7422 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7423 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7424
7425 #undef TARGET_REF_MAY_ALIAS_ERRNO
7426 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7427
7428 #undef TARGET_ASM_OUTPUT_MI_THUNK
7429 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7430 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7431 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7432
7433 /* Variable tracking should be run after all optimizations which
7434 change order of insns. It also needs a valid CFG. */
7435 #undef TARGET_DELAY_VARTRACK
7436 #define TARGET_DELAY_VARTRACK true
7437
7438 #undef TARGET_CANONICALIZE_COMPARISON
7439 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7440
7441 #undef TARGET_CAN_USE_DOLOOP_P
7442 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7443
7444 #undef TARGET_MODES_TIEABLE_P
7445 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7446
7447 #undef TARGET_HARD_REGNO_NREGS
7448 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7449
7450 #undef TARGET_CAN_CHANGE_MODE_CLASS
7451 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7452
7453 #undef TARGET_TRULY_NOOP_TRUNCATION
7454 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7455
7456 #undef TARGET_STATIC_RTX_ALIGNMENT
7457 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7458 #undef TARGET_CONSTANT_ALIGNMENT
7459 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7460
7461 struct gcc_target targetm = TARGET_INITIALIZER;
7462
7463 #include "gt-spu.h"