]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
84dddbcc5ba597fed1d9b086ea3596ff5477fb4b
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "output.h"
47 #include "cfgrtl.h"
48 #include "cfgbuild.h"
49 #include "langhooks.h"
50 #include "reload.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "gimplify.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "dumpfile.h"
57 #include "builtins.h"
58 #include "rtl-iter.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Builtin types, data and prototypes. */
64
65 enum spu_builtin_type_index
66 {
67 SPU_BTI_END_OF_PARAMS,
68
69 /* We create new type nodes for these. */
70 SPU_BTI_V16QI,
71 SPU_BTI_V8HI,
72 SPU_BTI_V4SI,
73 SPU_BTI_V2DI,
74 SPU_BTI_V4SF,
75 SPU_BTI_V2DF,
76 SPU_BTI_UV16QI,
77 SPU_BTI_UV8HI,
78 SPU_BTI_UV4SI,
79 SPU_BTI_UV2DI,
80
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
82 SPU_BTI_QUADWORD,
83
84 /* These all correspond to intSI_type_node */
85 SPU_BTI_7,
86 SPU_BTI_S7,
87 SPU_BTI_U7,
88 SPU_BTI_S10,
89 SPU_BTI_S10_4,
90 SPU_BTI_U14,
91 SPU_BTI_16,
92 SPU_BTI_S16,
93 SPU_BTI_S16_2,
94 SPU_BTI_U16,
95 SPU_BTI_U16_2,
96 SPU_BTI_U18,
97
98 /* These correspond to the standard types */
99 SPU_BTI_INTQI,
100 SPU_BTI_INTHI,
101 SPU_BTI_INTSI,
102 SPU_BTI_INTDI,
103
104 SPU_BTI_UINTQI,
105 SPU_BTI_UINTHI,
106 SPU_BTI_UINTSI,
107 SPU_BTI_UINTDI,
108
109 SPU_BTI_FLOAT,
110 SPU_BTI_DOUBLE,
111
112 SPU_BTI_VOID,
113 SPU_BTI_PTR,
114
115 SPU_BTI_MAX
116 };
117
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
128
129 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
130
131 struct spu_builtin_range
132 {
133 int low, high;
134 };
135
136 static struct spu_builtin_range spu_builtin_range[] = {
137 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
138 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll}, /* SPU_BTI_U7 */
140 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
141 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
143 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
144 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
145 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149 };
150
151 \f
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
154
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn *insn);
157 static int spu_naked_function_p (tree func);
158 static int mem_is_padded_component_ref (rtx x);
159 static void fix_range (const char *);
160 static rtx spu_expand_load (rtx, rtx, rtx, int);
161
162 /* Which instruction set architecture to use. */
163 int spu_arch;
164 /* Which cpu are we tuning for. */
165 int spu_tune;
166
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist = (8*4) - (2*4);
174
175 enum spu_immediate {
176 SPU_NONE,
177 SPU_IL,
178 SPU_ILA,
179 SPU_ILH,
180 SPU_ILHU,
181 SPU_ORI,
182 SPU_ORHI,
183 SPU_ORBI,
184 SPU_IOHL
185 };
186 enum immediate_class
187 {
188 IC_POOL, /* constant pool */
189 IC_IL1, /* one il* instruction */
190 IC_IL2, /* both ilhu and iohl instructions */
191 IC_IL1s, /* one il* instruction */
192 IC_IL2s, /* both ilhu and iohl instructions */
193 IC_FSMBI, /* the fsmbi instruction */
194 IC_CPAT, /* one of the c*d instructions */
195 IC_FSMBI2 /* fsmbi plus 1 other instruction */
196 };
197
198 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
199 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
200 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
201 static enum immediate_class classify_immediate (rtx op,
202 machine_mode mode);
203
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
206
207 \f
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
210 {
211 /* Register to use for PIC accesses. */
212 rtx pic_reg;
213 };
214
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function *
217 spu_init_machine_status (void)
218 {
219 return ggc_cleared_alloc<machine_function> ();
220 }
221
222 /* Implement TARGET_OPTION_OVERRIDE. */
223 static void
224 spu_option_override (void)
225 {
226 /* Set up function hooks. */
227 init_machine_status = spu_init_machine_status;
228
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops && !flag_peel_loops)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
233 global_options.x_param_values,
234 global_options_set.x_param_values);
235
236 flag_omit_frame_pointer = 1;
237
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions < 8)
240 align_functions = 8;
241
242 spu_hint_dist = 8*4 - spu_max_nops*4;
243 if (spu_hint_dist < 0)
244 spu_hint_dist = 0;
245
246 if (spu_fixed_range_string)
247 fix_range (spu_fixed_range_string);
248
249 /* Determine processor architectural level. */
250 if (spu_arch_string)
251 {
252 if (strcmp (&spu_arch_string[0], "cell") == 0)
253 spu_arch = PROCESSOR_CELL;
254 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
255 spu_arch = PROCESSOR_CELLEDP;
256 else
257 error ("bad value (%s) for -march= switch", spu_arch_string);
258 }
259
260 /* Determine processor to tune for. */
261 if (spu_tune_string)
262 {
263 if (strcmp (&spu_tune_string[0], "cell") == 0)
264 spu_tune = PROCESSOR_CELL;
265 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
266 spu_tune = PROCESSOR_CELLEDP;
267 else
268 error ("bad value (%s) for -mtune= switch", spu_tune_string);
269 }
270
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch == PROCESSOR_CELLEDP)
273 {
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit & MASK_SAFE_HINTS))
278 target_flags &= ~MASK_SAFE_HINTS;
279 }
280
281 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
282 }
283 \f
284 /* Implement TARGET_HARD_REGNO_NREGS. */
285
286 static unsigned int
287 spu_hard_regno_nregs (unsigned int, machine_mode mode)
288 {
289 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
290 }
291
292 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
293 struct attribute_spec.handler. */
294
295 /* True if MODE is valid for the target. By "valid", we mean able to
296 be manipulated in non-trivial ways. In particular, this means all
297 the arithmetic is supported. */
298 static bool
299 spu_scalar_mode_supported_p (scalar_mode mode)
300 {
301 switch (mode)
302 {
303 case E_QImode:
304 case E_HImode:
305 case E_SImode:
306 case E_SFmode:
307 case E_DImode:
308 case E_TImode:
309 case E_DFmode:
310 return true;
311
312 default:
313 return false;
314 }
315 }
316
317 /* Similarly for vector modes. "Supported" here is less strict. At
318 least some operations are supported; need to check optabs or builtins
319 for further details. */
320 static bool
321 spu_vector_mode_supported_p (machine_mode mode)
322 {
323 switch (mode)
324 {
325 case E_V16QImode:
326 case E_V8HImode:
327 case E_V4SImode:
328 case E_V2DImode:
329 case E_V4SFmode:
330 case E_V2DFmode:
331 return true;
332
333 default:
334 return false;
335 }
336 }
337
338 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
339 least significant bytes of the outer mode. This function returns
340 TRUE for the SUBREG's where this is correct. */
341 int
342 valid_subreg (rtx op)
343 {
344 machine_mode om = GET_MODE (op);
345 machine_mode im = GET_MODE (SUBREG_REG (op));
346 return om != VOIDmode && im != VOIDmode
347 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
348 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
349 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
350 }
351
352 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
353 and adjust the start offset. */
354 static rtx
355 adjust_operand (rtx op, HOST_WIDE_INT * start)
356 {
357 machine_mode mode;
358 int op_size;
359 /* Strip any paradoxical SUBREG. */
360 if (GET_CODE (op) == SUBREG
361 && (GET_MODE_BITSIZE (GET_MODE (op))
362 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
363 {
364 if (start)
365 *start -=
366 GET_MODE_BITSIZE (GET_MODE (op)) -
367 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
368 op = SUBREG_REG (op);
369 }
370 /* If it is smaller than SI, assure a SUBREG */
371 op_size = GET_MODE_BITSIZE (GET_MODE (op));
372 if (op_size < 32)
373 {
374 if (start)
375 *start += 32 - op_size;
376 op_size = 32;
377 }
378 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
379 mode = int_mode_for_size (op_size, 0).require ();
380 if (mode != GET_MODE (op))
381 op = gen_rtx_SUBREG (mode, op, 0);
382 return op;
383 }
384
385 void
386 spu_expand_extv (rtx ops[], int unsignedp)
387 {
388 rtx dst = ops[0], src = ops[1];
389 HOST_WIDE_INT width = INTVAL (ops[2]);
390 HOST_WIDE_INT start = INTVAL (ops[3]);
391 HOST_WIDE_INT align_mask;
392 rtx s0, s1, mask, r0;
393
394 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
395
396 if (MEM_P (src))
397 {
398 /* First, determine if we need 1 TImode load or 2. We need only 1
399 if the bits being extracted do not cross the alignment boundary
400 as determined by the MEM and its address. */
401
402 align_mask = -MEM_ALIGN (src);
403 if ((start & align_mask) == ((start + width - 1) & align_mask))
404 {
405 /* Alignment is sufficient for 1 load. */
406 s0 = gen_reg_rtx (TImode);
407 r0 = spu_expand_load (s0, 0, src, start / 8);
408 start &= 7;
409 if (r0)
410 emit_insn (gen_rotqby_ti (s0, s0, r0));
411 }
412 else
413 {
414 /* Need 2 loads. */
415 s0 = gen_reg_rtx (TImode);
416 s1 = gen_reg_rtx (TImode);
417 r0 = spu_expand_load (s0, s1, src, start / 8);
418 start &= 7;
419
420 gcc_assert (start + width <= 128);
421 if (r0)
422 {
423 rtx r1 = gen_reg_rtx (SImode);
424 mask = gen_reg_rtx (TImode);
425 emit_move_insn (mask, GEN_INT (-1));
426 emit_insn (gen_rotqby_ti (s0, s0, r0));
427 emit_insn (gen_rotqby_ti (s1, s1, r0));
428 if (GET_CODE (r0) == CONST_INT)
429 r1 = GEN_INT (INTVAL (r0) & 15);
430 else
431 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
432 emit_insn (gen_shlqby_ti (mask, mask, r1));
433 emit_insn (gen_selb (s0, s1, s0, mask));
434 }
435 }
436
437 }
438 else if (GET_CODE (src) == SUBREG)
439 {
440 rtx r = SUBREG_REG (src);
441 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
442 s0 = gen_reg_rtx (TImode);
443 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
444 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
445 else
446 emit_move_insn (s0, src);
447 }
448 else
449 {
450 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
451 s0 = gen_reg_rtx (TImode);
452 emit_move_insn (s0, src);
453 }
454
455 /* Now s0 is TImode and contains the bits to extract at start. */
456
457 if (start)
458 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
459
460 if (128 - width)
461 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
462
463 emit_move_insn (dst, s0);
464 }
465
466 void
467 spu_expand_insv (rtx ops[])
468 {
469 HOST_WIDE_INT width = INTVAL (ops[1]);
470 HOST_WIDE_INT start = INTVAL (ops[2]);
471 unsigned HOST_WIDE_INT maskbits;
472 machine_mode dst_mode;
473 rtx dst = ops[0], src = ops[3];
474 int dst_size;
475 rtx mask;
476 rtx shift_reg;
477 int shift;
478
479
480 if (GET_CODE (ops[0]) == MEM)
481 dst = gen_reg_rtx (TImode);
482 else
483 dst = adjust_operand (dst, &start);
484 dst_mode = GET_MODE (dst);
485 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
486
487 if (CONSTANT_P (src))
488 {
489 machine_mode m =
490 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
491 src = force_reg (m, convert_to_mode (m, src, 0));
492 }
493 src = adjust_operand (src, 0);
494
495 mask = gen_reg_rtx (dst_mode);
496 shift_reg = gen_reg_rtx (dst_mode);
497 shift = dst_size - start - width;
498
499 /* It's not safe to use subreg here because the compiler assumes
500 that the SUBREG_REG is right justified in the SUBREG. */
501 convert_move (shift_reg, src, 1);
502
503 if (shift > 0)
504 {
505 switch (dst_mode)
506 {
507 case E_SImode:
508 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
509 break;
510 case E_DImode:
511 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
512 break;
513 case E_TImode:
514 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
515 break;
516 default:
517 abort ();
518 }
519 }
520 else if (shift < 0)
521 abort ();
522
523 switch (dst_size)
524 {
525 case 32:
526 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
527 if (start)
528 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
529 emit_move_insn (mask, GEN_INT (maskbits));
530 break;
531 case 64:
532 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
533 if (start)
534 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
535 emit_move_insn (mask, GEN_INT (maskbits));
536 break;
537 case 128:
538 {
539 unsigned char arr[16];
540 int i = start / 8;
541 memset (arr, 0, sizeof (arr));
542 arr[i] = 0xff >> (start & 7);
543 for (i++; i <= (start + width - 1) / 8; i++)
544 arr[i] = 0xff;
545 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
546 emit_move_insn (mask, array_to_constant (TImode, arr));
547 }
548 break;
549 default:
550 abort ();
551 }
552 if (GET_CODE (ops[0]) == MEM)
553 {
554 rtx low = gen_reg_rtx (SImode);
555 rtx rotl = gen_reg_rtx (SImode);
556 rtx mask0 = gen_reg_rtx (TImode);
557 rtx addr;
558 rtx addr0;
559 rtx addr1;
560 rtx mem;
561
562 addr = force_reg (Pmode, XEXP (ops[0], 0));
563 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
564 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
565 emit_insn (gen_negsi2 (rotl, low));
566 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
567 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
568 mem = change_address (ops[0], TImode, addr0);
569 set_mem_alias_set (mem, 0);
570 emit_move_insn (dst, mem);
571 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
572 if (start + width > MEM_ALIGN (ops[0]))
573 {
574 rtx shl = gen_reg_rtx (SImode);
575 rtx mask1 = gen_reg_rtx (TImode);
576 rtx dst1 = gen_reg_rtx (TImode);
577 rtx mem1;
578 addr1 = plus_constant (Pmode, addr, 16);
579 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
580 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
581 emit_insn (gen_shlqby_ti (mask1, mask, shl));
582 mem1 = change_address (ops[0], TImode, addr1);
583 set_mem_alias_set (mem1, 0);
584 emit_move_insn (dst1, mem1);
585 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
586 emit_move_insn (mem1, dst1);
587 }
588 emit_move_insn (mem, dst);
589 }
590 else
591 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
592 }
593
594
595 int
596 spu_expand_block_move (rtx ops[])
597 {
598 HOST_WIDE_INT bytes, align, offset;
599 rtx src, dst, sreg, dreg, target;
600 int i;
601 if (GET_CODE (ops[2]) != CONST_INT
602 || GET_CODE (ops[3]) != CONST_INT
603 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
604 return 0;
605
606 bytes = INTVAL (ops[2]);
607 align = INTVAL (ops[3]);
608
609 if (bytes <= 0)
610 return 1;
611
612 dst = ops[0];
613 src = ops[1];
614
615 if (align == 16)
616 {
617 for (offset = 0; offset + 16 <= bytes; offset += 16)
618 {
619 dst = adjust_address (ops[0], V16QImode, offset);
620 src = adjust_address (ops[1], V16QImode, offset);
621 emit_move_insn (dst, src);
622 }
623 if (offset < bytes)
624 {
625 rtx mask;
626 unsigned char arr[16] = { 0 };
627 for (i = 0; i < bytes - offset; i++)
628 arr[i] = 0xff;
629 dst = adjust_address (ops[0], V16QImode, offset);
630 src = adjust_address (ops[1], V16QImode, offset);
631 mask = gen_reg_rtx (V16QImode);
632 sreg = gen_reg_rtx (V16QImode);
633 dreg = gen_reg_rtx (V16QImode);
634 target = gen_reg_rtx (V16QImode);
635 emit_move_insn (mask, array_to_constant (V16QImode, arr));
636 emit_move_insn (dreg, dst);
637 emit_move_insn (sreg, src);
638 emit_insn (gen_selb (target, dreg, sreg, mask));
639 emit_move_insn (dst, target);
640 }
641 return 1;
642 }
643 return 0;
644 }
645
646 enum spu_comp_code
647 { SPU_EQ, SPU_GT, SPU_GTU };
648
649 int spu_comp_icode[12][3] = {
650 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
651 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
652 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
653 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
654 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
655 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
656 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
657 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
658 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
659 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
660 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
661 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
662 };
663
664 /* Generate a compare for CODE. Return a brand-new rtx that represents
665 the result of the compare. GCC can figure this out too if we don't
666 provide all variations of compares, but GCC always wants to use
667 WORD_MODE, we can generate better code in most cases if we do it
668 ourselves. */
669 void
670 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
671 {
672 int reverse_compare = 0;
673 int reverse_test = 0;
674 rtx compare_result, eq_result;
675 rtx comp_rtx, eq_rtx;
676 machine_mode comp_mode;
677 machine_mode op_mode;
678 enum spu_comp_code scode, eq_code;
679 enum insn_code ior_code;
680 enum rtx_code code = GET_CODE (cmp);
681 rtx op0 = XEXP (cmp, 0);
682 rtx op1 = XEXP (cmp, 1);
683 int index;
684 int eq_test = 0;
685
686 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
687 and so on, to keep the constant in operand 1. */
688 if (GET_CODE (op1) == CONST_INT)
689 {
690 HOST_WIDE_INT val = INTVAL (op1) - 1;
691 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
692 switch (code)
693 {
694 case GE:
695 op1 = GEN_INT (val);
696 code = GT;
697 break;
698 case LT:
699 op1 = GEN_INT (val);
700 code = LE;
701 break;
702 case GEU:
703 op1 = GEN_INT (val);
704 code = GTU;
705 break;
706 case LTU:
707 op1 = GEN_INT (val);
708 code = LEU;
709 break;
710 default:
711 break;
712 }
713 }
714
715 /* However, if we generate an integer result, performing a reverse test
716 would require an extra negation, so avoid that where possible. */
717 if (GET_CODE (op1) == CONST_INT && is_set == 1)
718 {
719 HOST_WIDE_INT val = INTVAL (op1) + 1;
720 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
721 switch (code)
722 {
723 case LE:
724 op1 = GEN_INT (val);
725 code = LT;
726 break;
727 case LEU:
728 op1 = GEN_INT (val);
729 code = LTU;
730 break;
731 default:
732 break;
733 }
734 }
735
736 comp_mode = SImode;
737 op_mode = GET_MODE (op0);
738
739 switch (code)
740 {
741 case GE:
742 scode = SPU_GT;
743 if (HONOR_NANS (op_mode))
744 {
745 reverse_compare = 0;
746 reverse_test = 0;
747 eq_test = 1;
748 eq_code = SPU_EQ;
749 }
750 else
751 {
752 reverse_compare = 1;
753 reverse_test = 1;
754 }
755 break;
756 case LE:
757 scode = SPU_GT;
758 if (HONOR_NANS (op_mode))
759 {
760 reverse_compare = 1;
761 reverse_test = 0;
762 eq_test = 1;
763 eq_code = SPU_EQ;
764 }
765 else
766 {
767 reverse_compare = 0;
768 reverse_test = 1;
769 }
770 break;
771 case LT:
772 reverse_compare = 1;
773 reverse_test = 0;
774 scode = SPU_GT;
775 break;
776 case GEU:
777 reverse_compare = 1;
778 reverse_test = 1;
779 scode = SPU_GTU;
780 break;
781 case LEU:
782 reverse_compare = 0;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LTU:
787 reverse_compare = 1;
788 reverse_test = 0;
789 scode = SPU_GTU;
790 break;
791 case NE:
792 reverse_compare = 0;
793 reverse_test = 1;
794 scode = SPU_EQ;
795 break;
796
797 case EQ:
798 scode = SPU_EQ;
799 break;
800 case GT:
801 scode = SPU_GT;
802 break;
803 case GTU:
804 scode = SPU_GTU;
805 break;
806 default:
807 scode = SPU_EQ;
808 break;
809 }
810
811 switch (op_mode)
812 {
813 case E_QImode:
814 index = 0;
815 comp_mode = QImode;
816 break;
817 case E_HImode:
818 index = 1;
819 comp_mode = HImode;
820 break;
821 case E_SImode:
822 index = 2;
823 break;
824 case E_DImode:
825 index = 3;
826 break;
827 case E_TImode:
828 index = 4;
829 break;
830 case E_SFmode:
831 index = 5;
832 break;
833 case E_DFmode:
834 index = 6;
835 break;
836 case E_V16QImode:
837 index = 7;
838 comp_mode = op_mode;
839 break;
840 case E_V8HImode:
841 index = 8;
842 comp_mode = op_mode;
843 break;
844 case E_V4SImode:
845 index = 9;
846 comp_mode = op_mode;
847 break;
848 case E_V4SFmode:
849 index = 10;
850 comp_mode = V4SImode;
851 break;
852 case E_V2DFmode:
853 index = 11;
854 comp_mode = V2DImode;
855 break;
856 case E_V2DImode:
857 default:
858 abort ();
859 }
860
861 if (GET_MODE (op1) == DFmode
862 && (scode != SPU_GT && scode != SPU_EQ))
863 abort ();
864
865 if (is_set == 0 && op1 == const0_rtx
866 && (GET_MODE (op0) == SImode
867 || GET_MODE (op0) == HImode
868 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
869 {
870 /* Don't need to set a register with the result when we are
871 comparing against zero and branching. */
872 reverse_test = !reverse_test;
873 compare_result = op0;
874 }
875 else
876 {
877 compare_result = gen_reg_rtx (comp_mode);
878
879 if (reverse_compare)
880 {
881 rtx t = op1;
882 op1 = op0;
883 op0 = t;
884 }
885
886 if (spu_comp_icode[index][scode] == 0)
887 abort ();
888
889 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
890 (op0, op_mode))
891 op0 = force_reg (op_mode, op0);
892 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
893 (op1, op_mode))
894 op1 = force_reg (op_mode, op1);
895 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
896 op0, op1);
897 if (comp_rtx == 0)
898 abort ();
899 emit_insn (comp_rtx);
900
901 if (eq_test)
902 {
903 eq_result = gen_reg_rtx (comp_mode);
904 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
905 op0, op1);
906 if (eq_rtx == 0)
907 abort ();
908 emit_insn (eq_rtx);
909 ior_code = optab_handler (ior_optab, comp_mode);
910 gcc_assert (ior_code != CODE_FOR_nothing);
911 emit_insn (GEN_FCN (ior_code)
912 (compare_result, compare_result, eq_result));
913 }
914 }
915
916 if (is_set == 0)
917 {
918 rtx bcomp;
919 rtx loc_ref;
920
921 /* We don't have branch on QI compare insns, so we convert the
922 QI compare result to a HI result. */
923 if (comp_mode == QImode)
924 {
925 rtx old_res = compare_result;
926 compare_result = gen_reg_rtx (HImode);
927 comp_mode = HImode;
928 emit_insn (gen_extendqihi2 (compare_result, old_res));
929 }
930
931 if (reverse_test)
932 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
933 else
934 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
935
936 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
937 emit_jump_insn (gen_rtx_SET (pc_rtx,
938 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
939 loc_ref, pc_rtx)));
940 }
941 else if (is_set == 2)
942 {
943 rtx target = operands[0];
944 int compare_size = GET_MODE_BITSIZE (comp_mode);
945 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
946 machine_mode mode = int_mode_for_size (target_size, 0).require ();
947 rtx select_mask;
948 rtx op_t = operands[2];
949 rtx op_f = operands[3];
950
951 /* The result of the comparison can be SI, HI or QI mode. Create a
952 mask based on that result. */
953 if (target_size > compare_size)
954 {
955 select_mask = gen_reg_rtx (mode);
956 emit_insn (gen_extend_compare (select_mask, compare_result));
957 }
958 else if (target_size < compare_size)
959 select_mask =
960 gen_rtx_SUBREG (mode, compare_result,
961 (compare_size - target_size) / BITS_PER_UNIT);
962 else if (comp_mode != mode)
963 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
964 else
965 select_mask = compare_result;
966
967 if (GET_MODE (target) != GET_MODE (op_t)
968 || GET_MODE (target) != GET_MODE (op_f))
969 abort ();
970
971 if (reverse_test)
972 emit_insn (gen_selb (target, op_t, op_f, select_mask));
973 else
974 emit_insn (gen_selb (target, op_f, op_t, select_mask));
975 }
976 else
977 {
978 rtx target = operands[0];
979 if (reverse_test)
980 emit_insn (gen_rtx_SET (compare_result,
981 gen_rtx_NOT (comp_mode, compare_result)));
982 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
983 emit_insn (gen_extendhisi2 (target, compare_result));
984 else if (GET_MODE (target) == SImode
985 && GET_MODE (compare_result) == QImode)
986 emit_insn (gen_extend_compare (target, compare_result));
987 else
988 emit_move_insn (target, compare_result);
989 }
990 }
991
992 HOST_WIDE_INT
993 const_double_to_hwint (rtx x)
994 {
995 HOST_WIDE_INT val;
996 if (GET_MODE (x) == SFmode)
997 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
998 else if (GET_MODE (x) == DFmode)
999 {
1000 long l[2];
1001 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1002 val = l[0];
1003 val = (val << 32) | (l[1] & 0xffffffff);
1004 }
1005 else
1006 abort ();
1007 return val;
1008 }
1009
1010 rtx
1011 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1012 {
1013 long tv[2];
1014 REAL_VALUE_TYPE rv;
1015 gcc_assert (mode == SFmode || mode == DFmode);
1016
1017 if (mode == SFmode)
1018 tv[0] = (v << 32) >> 32;
1019 else if (mode == DFmode)
1020 {
1021 tv[1] = (v << 32) >> 32;
1022 tv[0] = v >> 32;
1023 }
1024 real_from_target (&rv, tv, mode);
1025 return const_double_from_real_value (rv, mode);
1026 }
1027
1028 void
1029 print_operand_address (FILE * file, register rtx addr)
1030 {
1031 rtx reg;
1032 rtx offset;
1033
1034 if (GET_CODE (addr) == AND
1035 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1036 && INTVAL (XEXP (addr, 1)) == -16)
1037 addr = XEXP (addr, 0);
1038
1039 switch (GET_CODE (addr))
1040 {
1041 case REG:
1042 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1043 break;
1044
1045 case PLUS:
1046 reg = XEXP (addr, 0);
1047 offset = XEXP (addr, 1);
1048 if (GET_CODE (offset) == REG)
1049 {
1050 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1051 reg_names[REGNO (offset)]);
1052 }
1053 else if (GET_CODE (offset) == CONST_INT)
1054 {
1055 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1056 INTVAL (offset), reg_names[REGNO (reg)]);
1057 }
1058 else
1059 abort ();
1060 break;
1061
1062 case CONST:
1063 case LABEL_REF:
1064 case SYMBOL_REF:
1065 case CONST_INT:
1066 output_addr_const (file, addr);
1067 break;
1068
1069 default:
1070 debug_rtx (addr);
1071 abort ();
1072 }
1073 }
1074
1075 void
1076 print_operand (FILE * file, rtx x, int code)
1077 {
1078 machine_mode mode = GET_MODE (x);
1079 HOST_WIDE_INT val;
1080 unsigned char arr[16];
1081 int xcode = GET_CODE (x);
1082 int i, info;
1083 if (GET_MODE (x) == VOIDmode)
1084 switch (code)
1085 {
1086 case 'L': /* 128 bits, signed */
1087 case 'm': /* 128 bits, signed */
1088 case 'T': /* 128 bits, signed */
1089 case 't': /* 128 bits, signed */
1090 mode = TImode;
1091 break;
1092 case 'K': /* 64 bits, signed */
1093 case 'k': /* 64 bits, signed */
1094 case 'D': /* 64 bits, signed */
1095 case 'd': /* 64 bits, signed */
1096 mode = DImode;
1097 break;
1098 case 'J': /* 32 bits, signed */
1099 case 'j': /* 32 bits, signed */
1100 case 's': /* 32 bits, signed */
1101 case 'S': /* 32 bits, signed */
1102 mode = SImode;
1103 break;
1104 }
1105 switch (code)
1106 {
1107
1108 case 'j': /* 32 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 if (xcode == CONST_INT
1112 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1113 {
1114 gcc_assert (logical_immediate_p (x, mode));
1115 constant_to_array (mode, x, arr);
1116 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1117 val = trunc_int_for_mode (val, SImode);
1118 switch (which_logical_immediate (val))
1119 {
1120 case SPU_ORI:
1121 break;
1122 case SPU_ORHI:
1123 fprintf (file, "h");
1124 break;
1125 case SPU_ORBI:
1126 fprintf (file, "b");
1127 break;
1128 default:
1129 gcc_unreachable();
1130 }
1131 }
1132 else
1133 gcc_unreachable();
1134 return;
1135
1136 case 'J': /* 32 bits, signed */
1137 case 'K': /* 64 bits, signed */
1138 case 'L': /* 128 bits, signed */
1139 if (xcode == CONST_INT
1140 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1141 {
1142 gcc_assert (logical_immediate_p (x, mode)
1143 || iohl_immediate_p (x, mode));
1144 constant_to_array (mode, x, arr);
1145 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1146 val = trunc_int_for_mode (val, SImode);
1147 switch (which_logical_immediate (val))
1148 {
1149 case SPU_ORI:
1150 case SPU_IOHL:
1151 break;
1152 case SPU_ORHI:
1153 val = trunc_int_for_mode (val, HImode);
1154 break;
1155 case SPU_ORBI:
1156 val = trunc_int_for_mode (val, QImode);
1157 break;
1158 default:
1159 gcc_unreachable();
1160 }
1161 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1162 }
1163 else
1164 gcc_unreachable();
1165 return;
1166
1167 case 't': /* 128 bits, signed */
1168 case 'd': /* 64 bits, signed */
1169 case 's': /* 32 bits, signed */
1170 if (CONSTANT_P (x))
1171 {
1172 enum immediate_class c = classify_immediate (x, mode);
1173 switch (c)
1174 {
1175 case IC_IL1:
1176 constant_to_array (mode, x, arr);
1177 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1178 val = trunc_int_for_mode (val, SImode);
1179 switch (which_immediate_load (val))
1180 {
1181 case SPU_IL:
1182 break;
1183 case SPU_ILA:
1184 fprintf (file, "a");
1185 break;
1186 case SPU_ILH:
1187 fprintf (file, "h");
1188 break;
1189 case SPU_ILHU:
1190 fprintf (file, "hu");
1191 break;
1192 default:
1193 gcc_unreachable ();
1194 }
1195 break;
1196 case IC_CPAT:
1197 constant_to_array (mode, x, arr);
1198 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1199 if (info == 1)
1200 fprintf (file, "b");
1201 else if (info == 2)
1202 fprintf (file, "h");
1203 else if (info == 4)
1204 fprintf (file, "w");
1205 else if (info == 8)
1206 fprintf (file, "d");
1207 break;
1208 case IC_IL1s:
1209 if (xcode == CONST_VECTOR)
1210 {
1211 x = CONST_VECTOR_ELT (x, 0);
1212 xcode = GET_CODE (x);
1213 }
1214 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1215 fprintf (file, "a");
1216 else if (xcode == HIGH)
1217 fprintf (file, "hu");
1218 break;
1219 case IC_FSMBI:
1220 case IC_FSMBI2:
1221 case IC_IL2:
1222 case IC_IL2s:
1223 case IC_POOL:
1224 abort ();
1225 }
1226 }
1227 else
1228 gcc_unreachable ();
1229 return;
1230
1231 case 'T': /* 128 bits, signed */
1232 case 'D': /* 64 bits, signed */
1233 case 'S': /* 32 bits, signed */
1234 if (CONSTANT_P (x))
1235 {
1236 enum immediate_class c = classify_immediate (x, mode);
1237 switch (c)
1238 {
1239 case IC_IL1:
1240 constant_to_array (mode, x, arr);
1241 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1242 val = trunc_int_for_mode (val, SImode);
1243 switch (which_immediate_load (val))
1244 {
1245 case SPU_IL:
1246 case SPU_ILA:
1247 break;
1248 case SPU_ILH:
1249 case SPU_ILHU:
1250 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1251 break;
1252 default:
1253 gcc_unreachable ();
1254 }
1255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1256 break;
1257 case IC_FSMBI:
1258 constant_to_array (mode, x, arr);
1259 val = 0;
1260 for (i = 0; i < 16; i++)
1261 {
1262 val <<= 1;
1263 val |= arr[i] & 1;
1264 }
1265 print_operand (file, GEN_INT (val), 0);
1266 break;
1267 case IC_CPAT:
1268 constant_to_array (mode, x, arr);
1269 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1271 break;
1272 case IC_IL1s:
1273 if (xcode == HIGH)
1274 x = XEXP (x, 0);
1275 if (GET_CODE (x) == CONST_VECTOR)
1276 x = CONST_VECTOR_ELT (x, 0);
1277 output_addr_const (file, x);
1278 if (xcode == HIGH)
1279 fprintf (file, "@h");
1280 break;
1281 case IC_IL2:
1282 case IC_IL2s:
1283 case IC_FSMBI2:
1284 case IC_POOL:
1285 abort ();
1286 }
1287 }
1288 else
1289 gcc_unreachable ();
1290 return;
1291
1292 case 'C':
1293 if (xcode == CONST_INT)
1294 {
1295 /* Only 4 least significant bits are relevant for generate
1296 control word instructions. */
1297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1298 return;
1299 }
1300 break;
1301
1302 case 'M': /* print code for c*d */
1303 if (GET_CODE (x) == CONST_INT)
1304 switch (INTVAL (x))
1305 {
1306 case 1:
1307 fprintf (file, "b");
1308 break;
1309 case 2:
1310 fprintf (file, "h");
1311 break;
1312 case 4:
1313 fprintf (file, "w");
1314 break;
1315 case 8:
1316 fprintf (file, "d");
1317 break;
1318 default:
1319 gcc_unreachable();
1320 }
1321 else
1322 gcc_unreachable();
1323 return;
1324
1325 case 'N': /* Negate the operand */
1326 if (xcode == CONST_INT)
1327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1328 else if (xcode == CONST_VECTOR)
1329 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1330 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1331 return;
1332
1333 case 'I': /* enable/disable interrupts */
1334 if (xcode == CONST_INT)
1335 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1336 return;
1337
1338 case 'b': /* branch modifiers */
1339 if (xcode == REG)
1340 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1341 else if (COMPARISON_P (x))
1342 fprintf (file, "%s", xcode == NE ? "n" : "");
1343 return;
1344
1345 case 'i': /* indirect call */
1346 if (xcode == MEM)
1347 {
1348 if (GET_CODE (XEXP (x, 0)) == REG)
1349 /* Used in indirect function calls. */
1350 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1351 else
1352 output_address (GET_MODE (x), XEXP (x, 0));
1353 }
1354 return;
1355
1356 case 'p': /* load/store */
1357 if (xcode == MEM)
1358 {
1359 x = XEXP (x, 0);
1360 xcode = GET_CODE (x);
1361 }
1362 if (xcode == AND)
1363 {
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1366 }
1367 if (xcode == REG)
1368 fprintf (file, "d");
1369 else if (xcode == CONST_INT)
1370 fprintf (file, "a");
1371 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1372 fprintf (file, "r");
1373 else if (xcode == PLUS || xcode == LO_SUM)
1374 {
1375 if (GET_CODE (XEXP (x, 1)) == REG)
1376 fprintf (file, "x");
1377 else
1378 fprintf (file, "d");
1379 }
1380 return;
1381
1382 case 'e':
1383 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1384 val &= 0x7;
1385 output_addr_const (file, GEN_INT (val));
1386 return;
1387
1388 case 'f':
1389 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1390 val &= 0x1f;
1391 output_addr_const (file, GEN_INT (val));
1392 return;
1393
1394 case 'g':
1395 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1396 val &= 0x3f;
1397 output_addr_const (file, GEN_INT (val));
1398 return;
1399
1400 case 'h':
1401 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1402 val = (val >> 3) & 0x1f;
1403 output_addr_const (file, GEN_INT (val));
1404 return;
1405
1406 case 'E':
1407 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1408 val = -val;
1409 val &= 0x7;
1410 output_addr_const (file, GEN_INT (val));
1411 return;
1412
1413 case 'F':
1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415 val = -val;
1416 val &= 0x1f;
1417 output_addr_const (file, GEN_INT (val));
1418 return;
1419
1420 case 'G':
1421 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1422 val = -val;
1423 val &= 0x3f;
1424 output_addr_const (file, GEN_INT (val));
1425 return;
1426
1427 case 'H':
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1429 val = -(val & -8ll);
1430 val = (val >> 3) & 0x1f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1433
1434 case 'v':
1435 case 'w':
1436 constant_to_array (mode, x, arr);
1437 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1438 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1439 return;
1440
1441 case 0:
1442 if (xcode == REG)
1443 fprintf (file, "%s", reg_names[REGNO (x)]);
1444 else if (xcode == MEM)
1445 output_address (GET_MODE (x), XEXP (x, 0));
1446 else if (xcode == CONST_VECTOR)
1447 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1448 else
1449 output_addr_const (file, x);
1450 return;
1451
1452 /* unused letters
1453 o qr u yz
1454 AB OPQR UVWXYZ */
1455 default:
1456 output_operand_lossage ("invalid %%xn code");
1457 }
1458 gcc_unreachable ();
1459 }
1460
1461 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1462 caller saved register. For leaf functions it is more efficient to
1463 use a volatile register because we won't need to save and restore the
1464 pic register. This routine is only valid after register allocation
1465 is completed, so we can pick an unused register. */
1466 static rtx
1467 get_pic_reg (void)
1468 {
1469 if (!reload_completed && !reload_in_progress)
1470 abort ();
1471
1472 /* If we've already made the decision, we need to keep with it. Once we've
1473 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1474 return true since the register is now live; this should not cause us to
1475 "switch back" to using pic_offset_table_rtx. */
1476 if (!cfun->machine->pic_reg)
1477 {
1478 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1479 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1480 else
1481 cfun->machine->pic_reg = pic_offset_table_rtx;
1482 }
1483
1484 return cfun->machine->pic_reg;
1485 }
1486
1487 /* Split constant addresses to handle cases that are too large.
1488 Add in the pic register when in PIC mode.
1489 Split immediates that require more than 1 instruction. */
1490 int
1491 spu_split_immediate (rtx * ops)
1492 {
1493 machine_mode mode = GET_MODE (ops[0]);
1494 enum immediate_class c = classify_immediate (ops[1], mode);
1495
1496 switch (c)
1497 {
1498 case IC_IL2:
1499 {
1500 unsigned char arrhi[16];
1501 unsigned char arrlo[16];
1502 rtx to, temp, hi, lo;
1503 int i;
1504 /* We need to do reals as ints because the constant used in the
1505 IOR might not be a legitimate real constant. */
1506 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1507 constant_to_array (mode, ops[1], arrhi);
1508 if (imode != mode)
1509 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1510 else
1511 to = ops[0];
1512 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1513 for (i = 0; i < 16; i += 4)
1514 {
1515 arrlo[i + 2] = arrhi[i + 2];
1516 arrlo[i + 3] = arrhi[i + 3];
1517 arrlo[i + 0] = arrlo[i + 1] = 0;
1518 arrhi[i + 2] = arrhi[i + 3] = 0;
1519 }
1520 hi = array_to_constant (imode, arrhi);
1521 lo = array_to_constant (imode, arrlo);
1522 emit_move_insn (temp, hi);
1523 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1524 return 1;
1525 }
1526 case IC_FSMBI2:
1527 {
1528 unsigned char arr_fsmbi[16];
1529 unsigned char arr_andbi[16];
1530 rtx to, reg_fsmbi, reg_and;
1531 int i;
1532 /* We need to do reals as ints because the constant used in the
1533 * AND might not be a legitimate real constant. */
1534 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1535 constant_to_array (mode, ops[1], arr_fsmbi);
1536 if (imode != mode)
1537 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1538 else
1539 to = ops[0];
1540 for (i = 0; i < 16; i++)
1541 if (arr_fsmbi[i] != 0)
1542 {
1543 arr_andbi[0] = arr_fsmbi[i];
1544 arr_fsmbi[i] = 0xff;
1545 }
1546 for (i = 1; i < 16; i++)
1547 arr_andbi[i] = arr_andbi[0];
1548 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1549 reg_and = array_to_constant (imode, arr_andbi);
1550 emit_move_insn (to, reg_fsmbi);
1551 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1552 return 1;
1553 }
1554 case IC_POOL:
1555 if (reload_in_progress || reload_completed)
1556 {
1557 rtx mem = force_const_mem (mode, ops[1]);
1558 if (TARGET_LARGE_MEM)
1559 {
1560 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1561 emit_move_insn (addr, XEXP (mem, 0));
1562 mem = replace_equiv_address (mem, addr);
1563 }
1564 emit_move_insn (ops[0], mem);
1565 return 1;
1566 }
1567 break;
1568 case IC_IL1s:
1569 case IC_IL2s:
1570 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1571 {
1572 if (c == IC_IL2s)
1573 {
1574 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1575 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1576 }
1577 else if (flag_pic)
1578 emit_insn (gen_pic (ops[0], ops[1]));
1579 if (flag_pic)
1580 {
1581 rtx pic_reg = get_pic_reg ();
1582 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1583 }
1584 return flag_pic || c == IC_IL2s;
1585 }
1586 break;
1587 case IC_IL1:
1588 case IC_FSMBI:
1589 case IC_CPAT:
1590 break;
1591 }
1592 return 0;
1593 }
1594
1595 /* SAVING is TRUE when we are generating the actual load and store
1596 instructions for REGNO. When determining the size of the stack
1597 needed for saving register we must allocate enough space for the
1598 worst case, because we don't always have the information early enough
1599 to not allocate it. But we can at least eliminate the actual loads
1600 and stores during the prologue/epilogue. */
1601 static int
1602 need_to_save_reg (int regno, int saving)
1603 {
1604 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1605 return 1;
1606 if (flag_pic
1607 && regno == PIC_OFFSET_TABLE_REGNUM
1608 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1609 return 1;
1610 return 0;
1611 }
1612
1613 /* This function is only correct starting with local register
1614 allocation */
1615 int
1616 spu_saved_regs_size (void)
1617 {
1618 int reg_save_size = 0;
1619 int regno;
1620
1621 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1622 if (need_to_save_reg (regno, 0))
1623 reg_save_size += 0x10;
1624 return reg_save_size;
1625 }
1626
1627 static rtx_insn *
1628 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1629 {
1630 rtx reg = gen_rtx_REG (V4SImode, regno);
1631 rtx mem =
1632 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1633 return emit_insn (gen_movv4si (mem, reg));
1634 }
1635
1636 static rtx_insn *
1637 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1638 {
1639 rtx reg = gen_rtx_REG (V4SImode, regno);
1640 rtx mem =
1641 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1642 return emit_insn (gen_movv4si (reg, mem));
1643 }
1644
1645 /* This happens after reload, so we need to expand it. */
1646 static rtx_insn *
1647 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1648 {
1649 rtx_insn *insn;
1650 if (satisfies_constraint_K (GEN_INT (imm)))
1651 {
1652 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1653 }
1654 else
1655 {
1656 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1657 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1658 if (REGNO (src) == REGNO (scratch))
1659 abort ();
1660 }
1661 return insn;
1662 }
1663
1664 /* Return nonzero if this function is known to have a null epilogue. */
1665
1666 int
1667 direct_return (void)
1668 {
1669 if (reload_completed)
1670 {
1671 if (cfun->static_chain_decl == 0
1672 && (spu_saved_regs_size ()
1673 + get_frame_size ()
1674 + crtl->outgoing_args_size
1675 + crtl->args.pretend_args_size == 0)
1676 && crtl->is_leaf)
1677 return 1;
1678 }
1679 return 0;
1680 }
1681
1682 /*
1683 The stack frame looks like this:
1684 +-------------+
1685 | incoming |
1686 | args |
1687 AP -> +-------------+
1688 | $lr save |
1689 +-------------+
1690 prev SP | back chain |
1691 +-------------+
1692 | var args |
1693 | reg save | crtl->args.pretend_args_size bytes
1694 +-------------+
1695 | ... |
1696 | saved regs | spu_saved_regs_size() bytes
1697 FP -> +-------------+
1698 | ... |
1699 | vars | get_frame_size() bytes
1700 HFP -> +-------------+
1701 | ... |
1702 | outgoing |
1703 | args | crtl->outgoing_args_size bytes
1704 +-------------+
1705 | $lr of next |
1706 | frame |
1707 +-------------+
1708 | back chain |
1709 SP -> +-------------+
1710
1711 */
1712 void
1713 spu_expand_prologue (void)
1714 {
1715 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1716 HOST_WIDE_INT total_size;
1717 HOST_WIDE_INT saved_regs_size;
1718 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1719 rtx scratch_reg_0, scratch_reg_1;
1720 rtx_insn *insn;
1721 rtx real;
1722
1723 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1724 cfun->machine->pic_reg = pic_offset_table_rtx;
1725
1726 if (spu_naked_function_p (current_function_decl))
1727 return;
1728
1729 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1730 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1731
1732 saved_regs_size = spu_saved_regs_size ();
1733 total_size = size + saved_regs_size
1734 + crtl->outgoing_args_size
1735 + crtl->args.pretend_args_size;
1736
1737 if (!crtl->is_leaf
1738 || cfun->calls_alloca || total_size > 0)
1739 total_size += STACK_POINTER_OFFSET;
1740
1741 /* Save this first because code after this might use the link
1742 register as a scratch register. */
1743 if (!crtl->is_leaf)
1744 {
1745 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1746 RTX_FRAME_RELATED_P (insn) = 1;
1747 }
1748
1749 if (total_size > 0)
1750 {
1751 offset = -crtl->args.pretend_args_size;
1752 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1753 if (need_to_save_reg (regno, 1))
1754 {
1755 offset -= 16;
1756 insn = frame_emit_store (regno, sp_reg, offset);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1758 }
1759 }
1760
1761 if (flag_pic && cfun->machine->pic_reg)
1762 {
1763 rtx pic_reg = cfun->machine->pic_reg;
1764 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1765 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1766 }
1767
1768 if (total_size > 0)
1769 {
1770 if (flag_stack_check || flag_stack_clash_protection)
1771 {
1772 /* We compare against total_size-1 because
1773 ($sp >= total_size) <=> ($sp > total_size-1) */
1774 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1775 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1776 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1777 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1778 {
1779 emit_move_insn (scratch_v4si, size_v4si);
1780 size_v4si = scratch_v4si;
1781 }
1782 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1783 emit_insn (gen_vec_extractv4sisi
1784 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1785 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1786 }
1787
1788 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1789 the value of the previous $sp because we save it as the back
1790 chain. */
1791 if (total_size <= 2000)
1792 {
1793 /* In this case we save the back chain first. */
1794 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1797 }
1798 else
1799 {
1800 insn = emit_move_insn (scratch_reg_0, sp_reg);
1801 insn =
1802 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1803 }
1804 RTX_FRAME_RELATED_P (insn) = 1;
1805 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1807
1808 if (total_size > 2000)
1809 {
1810 /* Save the back chain ptr */
1811 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1812 }
1813
1814 if (frame_pointer_needed)
1815 {
1816 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1817 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1818 + crtl->outgoing_args_size;
1819 /* Set the new frame_pointer */
1820 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1821 RTX_FRAME_RELATED_P (insn) = 1;
1822 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1823 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1824 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1825 }
1826 }
1827
1828 if (flag_stack_usage_info)
1829 current_function_static_stack_size = total_size;
1830 }
1831
1832 void
1833 spu_expand_epilogue (bool sibcall_p)
1834 {
1835 int size = get_frame_size (), offset, regno;
1836 HOST_WIDE_INT saved_regs_size, total_size;
1837 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1838 rtx scratch_reg_0;
1839
1840 if (spu_naked_function_p (current_function_decl))
1841 return;
1842
1843 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1844
1845 saved_regs_size = spu_saved_regs_size ();
1846 total_size = size + saved_regs_size
1847 + crtl->outgoing_args_size
1848 + crtl->args.pretend_args_size;
1849
1850 if (!crtl->is_leaf
1851 || cfun->calls_alloca || total_size > 0)
1852 total_size += STACK_POINTER_OFFSET;
1853
1854 if (total_size > 0)
1855 {
1856 if (cfun->calls_alloca)
1857 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1858 else
1859 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1860
1861
1862 if (saved_regs_size > 0)
1863 {
1864 offset = -crtl->args.pretend_args_size;
1865 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1866 if (need_to_save_reg (regno, 1))
1867 {
1868 offset -= 0x10;
1869 frame_emit_load (regno, sp_reg, offset);
1870 }
1871 }
1872 }
1873
1874 if (!crtl->is_leaf)
1875 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1876
1877 if (!sibcall_p)
1878 {
1879 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1880 emit_jump_insn (gen__return ());
1881 }
1882 }
1883
1884 rtx
1885 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1886 {
1887 if (count != 0)
1888 return 0;
1889 /* This is inefficient because it ends up copying to a save-register
1890 which then gets saved even though $lr has already been saved. But
1891 it does generate better code for leaf functions and we don't need
1892 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1893 used for __builtin_return_address anyway, so maybe we don't care if
1894 it's inefficient. */
1895 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1896 }
1897 \f
1898
1899 /* Given VAL, generate a constant appropriate for MODE.
1900 If MODE is a vector mode, every element will be VAL.
1901 For TImode, VAL will be zero extended to 128 bits. */
1902 rtx
1903 spu_const (machine_mode mode, HOST_WIDE_INT val)
1904 {
1905 rtx inner;
1906
1907 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1908 || GET_MODE_CLASS (mode) == MODE_FLOAT
1909 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1910 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1911
1912 if (GET_MODE_CLASS (mode) == MODE_INT)
1913 return immed_double_const (val, 0, mode);
1914
1915 /* val is the bit representation of the float */
1916 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1917 return hwint_to_const_double (mode, val);
1918
1919 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1920 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1921 else
1922 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1923
1924 return gen_const_vec_duplicate (mode, inner);
1925 }
1926
1927 /* Create a MODE vector constant from 4 ints. */
1928 rtx
1929 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1930 {
1931 unsigned char arr[16];
1932 arr[0] = (a >> 24) & 0xff;
1933 arr[1] = (a >> 16) & 0xff;
1934 arr[2] = (a >> 8) & 0xff;
1935 arr[3] = (a >> 0) & 0xff;
1936 arr[4] = (b >> 24) & 0xff;
1937 arr[5] = (b >> 16) & 0xff;
1938 arr[6] = (b >> 8) & 0xff;
1939 arr[7] = (b >> 0) & 0xff;
1940 arr[8] = (c >> 24) & 0xff;
1941 arr[9] = (c >> 16) & 0xff;
1942 arr[10] = (c >> 8) & 0xff;
1943 arr[11] = (c >> 0) & 0xff;
1944 arr[12] = (d >> 24) & 0xff;
1945 arr[13] = (d >> 16) & 0xff;
1946 arr[14] = (d >> 8) & 0xff;
1947 arr[15] = (d >> 0) & 0xff;
1948 return array_to_constant(mode, arr);
1949 }
1950 \f
1951 /* branch hint stuff */
1952
1953 /* An array of these is used to propagate hints to predecessor blocks. */
1954 struct spu_bb_info
1955 {
1956 rtx_insn *prop_jump; /* propagated from another block */
1957 int bb_index; /* the original block. */
1958 };
1959 static struct spu_bb_info *spu_bb_info;
1960
1961 #define STOP_HINT_P(INSN) \
1962 (CALL_P(INSN) \
1963 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1964 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1965
1966 /* 1 when RTX is a hinted branch or its target. We keep track of
1967 what has been hinted so the safe-hint code can test it easily. */
1968 #define HINTED_P(RTX) \
1969 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1970
1971 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1972 #define SCHED_ON_EVEN_P(RTX) \
1973 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1974
1975 /* Emit a nop for INSN such that the two will dual issue. This assumes
1976 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1977 We check for TImode to handle a MULTI1 insn which has dual issued its
1978 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1979 static void
1980 emit_nop_for_insn (rtx_insn *insn)
1981 {
1982 int p;
1983 rtx_insn *new_insn;
1984
1985 /* We need to handle JUMP_TABLE_DATA separately. */
1986 if (JUMP_TABLE_DATA_P (insn))
1987 {
1988 new_insn = emit_insn_after (gen_lnop(), insn);
1989 recog_memoized (new_insn);
1990 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1991 return;
1992 }
1993
1994 p = get_pipe (insn);
1995 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1996 new_insn = emit_insn_after (gen_lnop (), insn);
1997 else if (p == 1 && GET_MODE (insn) == TImode)
1998 {
1999 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2000 PUT_MODE (new_insn, TImode);
2001 PUT_MODE (insn, VOIDmode);
2002 }
2003 else
2004 new_insn = emit_insn_after (gen_lnop (), insn);
2005 recog_memoized (new_insn);
2006 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2007 }
2008
2009 /* Insert nops in basic blocks to meet dual issue alignment
2010 requirements. Also make sure hbrp and hint instructions are at least
2011 one cycle apart, possibly inserting a nop. */
2012 static void
2013 pad_bb(void)
2014 {
2015 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2016 int length;
2017 int addr;
2018
2019 /* This sets up INSN_ADDRESSES. */
2020 shorten_branches (get_insns ());
2021
2022 /* Keep track of length added by nops. */
2023 length = 0;
2024
2025 prev_insn = 0;
2026 insn = get_insns ();
2027 if (!active_insn_p (insn))
2028 insn = next_active_insn (insn);
2029 for (; insn; insn = next_insn)
2030 {
2031 next_insn = next_active_insn (insn);
2032 if (INSN_P (insn)
2033 && (INSN_CODE (insn) == CODE_FOR_iprefetch
2034 || INSN_CODE (insn) == CODE_FOR_hbr))
2035 {
2036 if (hbr_insn)
2037 {
2038 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2039 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2040 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2041 || (a1 - a0 == 4))
2042 {
2043 prev_insn = emit_insn_before (gen_lnop (), insn);
2044 PUT_MODE (prev_insn, GET_MODE (insn));
2045 PUT_MODE (insn, TImode);
2046 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2047 length += 4;
2048 }
2049 }
2050 hbr_insn = insn;
2051 }
2052 if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2053 {
2054 if (GET_MODE (insn) == TImode)
2055 PUT_MODE (next_insn, TImode);
2056 insn = next_insn;
2057 next_insn = next_active_insn (insn);
2058 }
2059 addr = INSN_ADDRESSES (INSN_UID (insn));
2060 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2061 {
2062 if (((addr + length) & 7) != 0)
2063 {
2064 emit_nop_for_insn (prev_insn);
2065 length += 4;
2066 }
2067 }
2068 else if (GET_MODE (insn) == TImode
2069 && ((next_insn && GET_MODE (next_insn) != TImode)
2070 || get_attr_type (insn) == TYPE_MULTI0)
2071 && ((addr + length) & 7) != 0)
2072 {
2073 /* prev_insn will always be set because the first insn is
2074 always 8-byte aligned. */
2075 emit_nop_for_insn (prev_insn);
2076 length += 4;
2077 }
2078 prev_insn = insn;
2079 }
2080 }
2081
2082 \f
2083 /* Routines for branch hints. */
2084
2085 static void
2086 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2087 int distance, sbitmap blocks)
2088 {
2089 rtx_insn *hint;
2090 rtx_insn *insn;
2091 rtx_jump_table_data *table;
2092
2093 if (before == 0 || branch == 0 || target == 0)
2094 return;
2095
2096 /* While scheduling we require hints to be no further than 600, so
2097 we need to enforce that here too */
2098 if (distance > 600)
2099 return;
2100
2101 /* If we have a Basic block note, emit it after the basic block note. */
2102 if (NOTE_INSN_BASIC_BLOCK_P (before))
2103 before = NEXT_INSN (before);
2104
2105 rtx_code_label *branch_label = gen_label_rtx ();
2106 LABEL_NUSES (branch_label)++;
2107 LABEL_PRESERVE_P (branch_label) = 1;
2108 insn = emit_label_before (branch_label, branch);
2109 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2110 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2111
2112 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2113 recog_memoized (hint);
2114 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2115 HINTED_P (branch) = 1;
2116
2117 if (GET_CODE (target) == LABEL_REF)
2118 HINTED_P (XEXP (target, 0)) = 1;
2119 else if (tablejump_p (branch, 0, &table))
2120 {
2121 rtvec vec;
2122 int j;
2123 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2124 vec = XVEC (PATTERN (table), 0);
2125 else
2126 vec = XVEC (PATTERN (table), 1);
2127 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2128 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2129 }
2130
2131 if (distance >= 588)
2132 {
2133 /* Make sure the hint isn't scheduled any earlier than this point,
2134 which could make it too far for the branch offest to fit */
2135 insn = emit_insn_before (gen_blockage (), hint);
2136 recog_memoized (insn);
2137 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2138 }
2139 else if (distance <= 8 * 4)
2140 {
2141 /* To guarantee at least 8 insns between the hint and branch we
2142 insert nops. */
2143 int d;
2144 for (d = distance; d < 8 * 4; d += 4)
2145 {
2146 insn =
2147 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2148 recog_memoized (insn);
2149 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2150 }
2151
2152 /* Make sure any nops inserted aren't scheduled before the hint. */
2153 insn = emit_insn_after (gen_blockage (), hint);
2154 recog_memoized (insn);
2155 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2156
2157 /* Make sure any nops inserted aren't scheduled after the call. */
2158 if (CALL_P (branch) && distance < 8 * 4)
2159 {
2160 insn = emit_insn_before (gen_blockage (), branch);
2161 recog_memoized (insn);
2162 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2163 }
2164 }
2165 }
2166
2167 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2168 the rtx for the branch target. */
2169 static rtx
2170 get_branch_target (rtx_insn *branch)
2171 {
2172 if (JUMP_P (branch))
2173 {
2174 rtx set, src;
2175
2176 /* Return statements */
2177 if (GET_CODE (PATTERN (branch)) == RETURN)
2178 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2179
2180 /* ASM GOTOs. */
2181 if (extract_asm_operands (PATTERN (branch)) != NULL)
2182 return NULL;
2183
2184 set = single_set (branch);
2185 src = SET_SRC (set);
2186 if (GET_CODE (SET_DEST (set)) != PC)
2187 abort ();
2188
2189 if (GET_CODE (src) == IF_THEN_ELSE)
2190 {
2191 rtx lab = 0;
2192 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2193 if (note)
2194 {
2195 /* If the more probable case is not a fall through, then
2196 try a branch hint. */
2197 int prob = profile_probability::from_reg_br_prob_note
2198 (XINT (note, 0)).to_reg_br_prob_base ();
2199 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2200 && GET_CODE (XEXP (src, 1)) != PC)
2201 lab = XEXP (src, 1);
2202 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2203 && GET_CODE (XEXP (src, 2)) != PC)
2204 lab = XEXP (src, 2);
2205 }
2206 if (lab)
2207 {
2208 if (GET_CODE (lab) == RETURN)
2209 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2210 return lab;
2211 }
2212 return 0;
2213 }
2214
2215 return src;
2216 }
2217 else if (CALL_P (branch))
2218 {
2219 rtx call;
2220 /* All of our call patterns are in a PARALLEL and the CALL is
2221 the first pattern in the PARALLEL. */
2222 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2223 abort ();
2224 call = XVECEXP (PATTERN (branch), 0, 0);
2225 if (GET_CODE (call) == SET)
2226 call = SET_SRC (call);
2227 if (GET_CODE (call) != CALL)
2228 abort ();
2229 return XEXP (XEXP (call, 0), 0);
2230 }
2231 return 0;
2232 }
2233
2234 /* The special $hbr register is used to prevent the insn scheduler from
2235 moving hbr insns across instructions which invalidate them. It
2236 should only be used in a clobber, and this function searches for
2237 insns which clobber it. */
2238 static bool
2239 insn_clobbers_hbr (rtx_insn *insn)
2240 {
2241 if (INSN_P (insn)
2242 && GET_CODE (PATTERN (insn)) == PARALLEL)
2243 {
2244 rtx parallel = PATTERN (insn);
2245 rtx clobber;
2246 int j;
2247 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 {
2249 clobber = XVECEXP (parallel, 0, j);
2250 if (GET_CODE (clobber) == CLOBBER
2251 && GET_CODE (XEXP (clobber, 0)) == REG
2252 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2253 return 1;
2254 }
2255 }
2256 return 0;
2257 }
2258
2259 /* Search up to 32 insns starting at FIRST:
2260 - at any kind of hinted branch, just return
2261 - at any unconditional branch in the first 15 insns, just return
2262 - at a call or indirect branch, after the first 15 insns, force it to
2263 an even address and return
2264 - at any unconditional branch, after the first 15 insns, force it to
2265 an even address.
2266 At then end of the search, insert an hbrp within 4 insns of FIRST,
2267 and an hbrp within 16 instructions of FIRST.
2268 */
2269 static void
2270 insert_hbrp_for_ilb_runout (rtx_insn *first)
2271 {
2272 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2273 int addr = 0, length, first_addr = -1;
2274 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2275 int insert_lnop_after = 0;
2276 for (insn = first; insn; insn = NEXT_INSN (insn))
2277 if (INSN_P (insn))
2278 {
2279 if (first_addr == -1)
2280 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2281 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2282 length = get_attr_length (insn);
2283
2284 if (before_4 == 0 && addr + length >= 4 * 4)
2285 before_4 = insn;
2286 /* We test for 14 instructions because the first hbrp will add
2287 up to 2 instructions. */
2288 if (before_16 == 0 && addr + length >= 14 * 4)
2289 before_16 = insn;
2290
2291 if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 {
2293 /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 Insert an lnop after the hbrp when necessary. */
2295 if (before_4 == 0 && addr > 0)
2296 {
2297 before_4 = insn;
2298 insert_lnop_after |= 1;
2299 }
2300 else if (before_4 && addr <= 4 * 4)
2301 insert_lnop_after |= 1;
2302 if (before_16 == 0 && addr > 10 * 4)
2303 {
2304 before_16 = insn;
2305 insert_lnop_after |= 2;
2306 }
2307 else if (before_16 && addr <= 14 * 4)
2308 insert_lnop_after |= 2;
2309 }
2310
2311 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 {
2313 if (addr < hbrp_addr0)
2314 hbrp_addr0 = addr;
2315 else if (addr < hbrp_addr1)
2316 hbrp_addr1 = addr;
2317 }
2318
2319 if (CALL_P (insn) || JUMP_P (insn))
2320 {
2321 if (HINTED_P (insn))
2322 return;
2323
2324 /* Any branch after the first 15 insns should be on an even
2325 address to avoid a special case branch. There might be
2326 some nops and/or hbrps inserted, so we test after 10
2327 insns. */
2328 if (addr > 10 * 4)
2329 SCHED_ON_EVEN_P (insn) = 1;
2330 }
2331
2332 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2333 return;
2334
2335
2336 if (addr + length >= 32 * 4)
2337 {
2338 gcc_assert (before_4 && before_16);
2339 if (hbrp_addr0 > 4 * 4)
2340 {
2341 insn =
2342 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2343 recog_memoized (insn);
2344 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2345 INSN_ADDRESSES_NEW (insn,
2346 INSN_ADDRESSES (INSN_UID (before_4)));
2347 PUT_MODE (insn, GET_MODE (before_4));
2348 PUT_MODE (before_4, TImode);
2349 if (insert_lnop_after & 1)
2350 {
2351 insn = emit_insn_before (gen_lnop (), before_4);
2352 recog_memoized (insn);
2353 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2354 INSN_ADDRESSES_NEW (insn,
2355 INSN_ADDRESSES (INSN_UID (before_4)));
2356 PUT_MODE (insn, TImode);
2357 }
2358 }
2359 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2360 && hbrp_addr1 > 16 * 4)
2361 {
2362 insn =
2363 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2364 recog_memoized (insn);
2365 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2366 INSN_ADDRESSES_NEW (insn,
2367 INSN_ADDRESSES (INSN_UID (before_16)));
2368 PUT_MODE (insn, GET_MODE (before_16));
2369 PUT_MODE (before_16, TImode);
2370 if (insert_lnop_after & 2)
2371 {
2372 insn = emit_insn_before (gen_lnop (), before_16);
2373 recog_memoized (insn);
2374 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2375 INSN_ADDRESSES_NEW (insn,
2376 INSN_ADDRESSES (INSN_UID
2377 (before_16)));
2378 PUT_MODE (insn, TImode);
2379 }
2380 }
2381 return;
2382 }
2383 }
2384 else if (BARRIER_P (insn))
2385 return;
2386
2387 }
2388
2389 /* The SPU might hang when it executes 48 inline instructions after a
2390 hinted branch jumps to its hinted target. The beginning of a
2391 function and the return from a call might have been hinted, and
2392 must be handled as well. To prevent a hang we insert 2 hbrps. The
2393 first should be within 6 insns of the branch target. The second
2394 should be within 22 insns of the branch target. When determining
2395 if hbrps are necessary, we look for only 32 inline instructions,
2396 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2397 when inserting new hbrps, we insert them within 4 and 16 insns of
2398 the target. */
2399 static void
2400 insert_hbrp (void)
2401 {
2402 rtx_insn *insn;
2403 if (TARGET_SAFE_HINTS)
2404 {
2405 shorten_branches (get_insns ());
2406 /* Insert hbrp at beginning of function */
2407 insn = next_active_insn (get_insns ());
2408 if (insn)
2409 insert_hbrp_for_ilb_runout (insn);
2410 /* Insert hbrp after hinted targets. */
2411 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2412 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2413 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2414 }
2415 }
2416
2417 static int in_spu_reorg;
2418
2419 static void
2420 spu_var_tracking (void)
2421 {
2422 if (flag_var_tracking)
2423 {
2424 df_analyze ();
2425 timevar_push (TV_VAR_TRACKING);
2426 variable_tracking_main ();
2427 timevar_pop (TV_VAR_TRACKING);
2428 df_finish_pass (false);
2429 }
2430 }
2431
2432 /* Insert branch hints. There are no branch optimizations after this
2433 pass, so it's safe to set our branch hints now. */
2434 static void
2435 spu_machine_dependent_reorg (void)
2436 {
2437 sbitmap blocks;
2438 basic_block bb;
2439 rtx_insn *branch, *insn;
2440 rtx branch_target = 0;
2441 int branch_addr = 0, insn_addr, required_dist = 0;
2442 int i;
2443 unsigned int j;
2444
2445 if (!TARGET_BRANCH_HINTS || optimize == 0)
2446 {
2447 /* We still do it for unoptimized code because an external
2448 function might have hinted a call or return. */
2449 compute_bb_for_insn ();
2450 insert_hbrp ();
2451 pad_bb ();
2452 spu_var_tracking ();
2453 free_bb_for_insn ();
2454 return;
2455 }
2456
2457 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2458 bitmap_clear (blocks);
2459
2460 in_spu_reorg = 1;
2461 compute_bb_for_insn ();
2462
2463 /* (Re-)discover loops so that bb->loop_father can be used
2464 in the analysis below. */
2465 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466
2467 compact_blocks ();
2468
2469 spu_bb_info =
2470 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2471 sizeof (struct spu_bb_info));
2472
2473 /* We need exact insn addresses and lengths. */
2474 shorten_branches (get_insns ());
2475
2476 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2477 {
2478 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2479 branch = 0;
2480 if (spu_bb_info[i].prop_jump)
2481 {
2482 branch = spu_bb_info[i].prop_jump;
2483 branch_target = get_branch_target (branch);
2484 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2485 required_dist = spu_hint_dist;
2486 }
2487 /* Search from end of a block to beginning. In this loop, find
2488 jumps which need a branch and emit them only when:
2489 - it's an indirect branch and we're at the insn which sets
2490 the register
2491 - we're at an insn that will invalidate the hint. e.g., a
2492 call, another hint insn, inline asm that clobbers $hbr, and
2493 some inlined operations (divmodsi4). Don't consider jumps
2494 because they are only at the end of a block and are
2495 considered when we are deciding whether to propagate
2496 - we're getting too far away from the branch. The hbr insns
2497 only have a signed 10 bit offset
2498 We go back as far as possible so the branch will be considered
2499 for propagation when we get to the beginning of the block. */
2500 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 {
2502 if (INSN_P (insn))
2503 {
2504 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2505 if (branch
2506 && ((GET_CODE (branch_target) == REG
2507 && set_of (branch_target, insn) != NULL_RTX)
2508 || insn_clobbers_hbr (insn)
2509 || branch_addr - insn_addr > 600))
2510 {
2511 rtx_insn *next = NEXT_INSN (insn);
2512 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2513 if (insn != BB_END (bb)
2514 && branch_addr - next_addr >= required_dist)
2515 {
2516 if (dump_file)
2517 fprintf (dump_file,
2518 "hint for %i in block %i before %i\n",
2519 INSN_UID (branch), bb->index,
2520 INSN_UID (next));
2521 spu_emit_branch_hint (next, branch, branch_target,
2522 branch_addr - next_addr, blocks);
2523 }
2524 branch = 0;
2525 }
2526
2527 /* JUMP_P will only be true at the end of a block. When
2528 branch is already set it means we've previously decided
2529 to propagate a hint for that branch into this block. */
2530 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 {
2532 branch = 0;
2533 if ((branch_target = get_branch_target (insn)))
2534 {
2535 branch = insn;
2536 branch_addr = insn_addr;
2537 required_dist = spu_hint_dist;
2538 }
2539 }
2540 }
2541 if (insn == BB_HEAD (bb))
2542 break;
2543 }
2544
2545 if (branch)
2546 {
2547 /* If we haven't emitted a hint for this branch yet, it might
2548 be profitable to emit it in one of the predecessor blocks,
2549 especially for loops. */
2550 rtx_insn *bbend;
2551 basic_block prev = 0, prop = 0, prev2 = 0;
2552 int loop_exit = 0, simple_loop = 0;
2553 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554
2555 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2556 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2557 prev = EDGE_PRED (bb, j)->src;
2558 else
2559 prev2 = EDGE_PRED (bb, j)->src;
2560
2561 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2562 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2563 loop_exit = 1;
2564 else if (EDGE_SUCC (bb, j)->dest == bb)
2565 simple_loop = 1;
2566
2567 /* If this branch is a loop exit then propagate to previous
2568 fallthru block. This catches the cases when it is a simple
2569 loop or when there is an initial branch into the loop. */
2570 if (prev && (loop_exit || simple_loop)
2571 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2572 prop = prev;
2573
2574 /* If there is only one adjacent predecessor. Don't propagate
2575 outside this loop. */
2576 else if (prev && single_pred_p (bb)
2577 && prev->loop_father == bb->loop_father)
2578 prop = prev;
2579
2580 /* If this is the JOIN block of a simple IF-THEN then
2581 propagate the hint to the HEADER block. */
2582 else if (prev && prev2
2583 && EDGE_COUNT (bb->preds) == 2
2584 && EDGE_COUNT (prev->preds) == 1
2585 && EDGE_PRED (prev, 0)->src == prev2
2586 && prev2->loop_father == bb->loop_father
2587 && GET_CODE (branch_target) != REG)
2588 prop = prev;
2589
2590 /* Don't propagate when:
2591 - this is a simple loop and the hint would be too far
2592 - this is not a simple loop and there are 16 insns in
2593 this block already
2594 - the predecessor block ends in a branch that will be
2595 hinted
2596 - the predecessor block ends in an insn that invalidates
2597 the hint */
2598 if (prop
2599 && prop->index >= 0
2600 && (bbend = BB_END (prop))
2601 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2602 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2603 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 {
2605 if (dump_file)
2606 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2607 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2608 bb->index, prop->index, bb_loop_depth (bb),
2609 INSN_UID (branch), loop_exit, simple_loop,
2610 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611
2612 spu_bb_info[prop->index].prop_jump = branch;
2613 spu_bb_info[prop->index].bb_index = i;
2614 }
2615 else if (branch_addr - next_addr >= required_dist)
2616 {
2617 if (dump_file)
2618 fprintf (dump_file, "hint for %i in block %i before %i\n",
2619 INSN_UID (branch), bb->index,
2620 INSN_UID (NEXT_INSN (insn)));
2621 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2622 branch_addr - next_addr, blocks);
2623 }
2624 branch = 0;
2625 }
2626 }
2627 free (spu_bb_info);
2628
2629 if (!bitmap_empty_p (blocks))
2630 find_many_sub_basic_blocks (blocks);
2631
2632 /* We have to schedule to make sure alignment is ok. */
2633 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2634
2635 /* The hints need to be scheduled, so call it again. */
2636 schedule_insns ();
2637 df_finish_pass (true);
2638
2639 insert_hbrp ();
2640
2641 pad_bb ();
2642
2643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2644 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645 {
2646 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 between its branch label and the branch . We don't move the
2648 label because GCC expects it at the beginning of the block. */
2649 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2650 rtx label_ref = XVECEXP (unspec, 0, 0);
2651 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2652 rtx_insn *branch;
2653 int offset = 0;
2654 for (branch = NEXT_INSN (label);
2655 !JUMP_P (branch) && !CALL_P (branch);
2656 branch = NEXT_INSN (branch))
2657 if (NONJUMP_INSN_P (branch))
2658 offset += get_attr_length (branch);
2659 if (offset > 0)
2660 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2661 }
2662
2663 spu_var_tracking ();
2664
2665 loop_optimizer_finalize ();
2666
2667 free_bb_for_insn ();
2668
2669 in_spu_reorg = 0;
2670 }
2671 \f
2672
2673 /* Insn scheduling routines, primarily for dual issue. */
2674 static int
2675 spu_sched_issue_rate (void)
2676 {
2677 return 2;
2678 }
2679
2680 static int
2681 uses_ls_unit(rtx_insn *insn)
2682 {
2683 rtx set = single_set (insn);
2684 if (set != 0
2685 && (GET_CODE (SET_DEST (set)) == MEM
2686 || GET_CODE (SET_SRC (set)) == MEM))
2687 return 1;
2688 return 0;
2689 }
2690
2691 static int
2692 get_pipe (rtx_insn *insn)
2693 {
2694 enum attr_type t;
2695 /* Handle inline asm */
2696 if (INSN_CODE (insn) == -1)
2697 return -1;
2698 t = get_attr_type (insn);
2699 switch (t)
2700 {
2701 case TYPE_CONVERT:
2702 return -2;
2703 case TYPE_MULTI0:
2704 return -1;
2705
2706 case TYPE_FX2:
2707 case TYPE_FX3:
2708 case TYPE_SPR:
2709 case TYPE_NOP:
2710 case TYPE_FXB:
2711 case TYPE_FPD:
2712 case TYPE_FP6:
2713 case TYPE_FP7:
2714 return 0;
2715
2716 case TYPE_LNOP:
2717 case TYPE_SHUF:
2718 case TYPE_LOAD:
2719 case TYPE_STORE:
2720 case TYPE_BR:
2721 case TYPE_MULTI1:
2722 case TYPE_HBR:
2723 case TYPE_IPREFETCH:
2724 return 1;
2725 default:
2726 abort ();
2727 }
2728 }
2729
2730
2731 /* haifa-sched.c has a static variable that keeps track of the current
2732 cycle. It is passed to spu_sched_reorder, and we record it here for
2733 use by spu_sched_variable_issue. It won't be accurate if the
2734 scheduler updates it's clock_var between the two calls. */
2735 static int clock_var;
2736
2737 /* This is used to keep track of insn alignment. Set to 0 at the
2738 beginning of each block and increased by the "length" attr of each
2739 insn scheduled. */
2740 static int spu_sched_length;
2741
2742 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743 ready list appropriately in spu_sched_reorder(). */
2744 static int pipe0_clock;
2745 static int pipe1_clock;
2746
2747 static int prev_clock_var;
2748
2749 static int prev_priority;
2750
2751 /* The SPU needs to load the next ilb sometime during the execution of
2752 the previous ilb. There is a potential conflict if every cycle has a
2753 load or store. To avoid the conflict we make sure the load/store
2754 unit is free for at least one cycle during the execution of insns in
2755 the previous ilb. */
2756 static int spu_ls_first;
2757 static int prev_ls_clock;
2758
2759 static void
2760 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2761 int max_ready ATTRIBUTE_UNUSED)
2762 {
2763 spu_sched_length = 0;
2764 }
2765
2766 static void
2767 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 int max_ready ATTRIBUTE_UNUSED)
2769 {
2770 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771 {
2772 /* When any block might be at least 8-byte aligned, assume they
2773 will all be at least 8-byte aligned to make sure dual issue
2774 works out correctly. */
2775 spu_sched_length = 0;
2776 }
2777 spu_ls_first = INT_MAX;
2778 clock_var = -1;
2779 prev_ls_clock = -1;
2780 pipe0_clock = -1;
2781 pipe1_clock = -1;
2782 prev_clock_var = -1;
2783 prev_priority = -1;
2784 }
2785
2786 static int
2787 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2788 int verbose ATTRIBUTE_UNUSED,
2789 rtx_insn *insn, int more)
2790 {
2791 int len;
2792 int p;
2793 if (GET_CODE (PATTERN (insn)) == USE
2794 || GET_CODE (PATTERN (insn)) == CLOBBER
2795 || (len = get_attr_length (insn)) == 0)
2796 return more;
2797
2798 spu_sched_length += len;
2799
2800 /* Reset on inline asm */
2801 if (INSN_CODE (insn) == -1)
2802 {
2803 spu_ls_first = INT_MAX;
2804 pipe0_clock = -1;
2805 pipe1_clock = -1;
2806 return 0;
2807 }
2808 p = get_pipe (insn);
2809 if (p == 0)
2810 pipe0_clock = clock_var;
2811 else
2812 pipe1_clock = clock_var;
2813
2814 if (in_spu_reorg)
2815 {
2816 if (clock_var - prev_ls_clock > 1
2817 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2818 spu_ls_first = INT_MAX;
2819 if (uses_ls_unit (insn))
2820 {
2821 if (spu_ls_first == INT_MAX)
2822 spu_ls_first = spu_sched_length;
2823 prev_ls_clock = clock_var;
2824 }
2825
2826 /* The scheduler hasn't inserted the nop, but we will later on.
2827 Include those nops in spu_sched_length. */
2828 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2829 spu_sched_length += 4;
2830 prev_clock_var = clock_var;
2831
2832 /* more is -1 when called from spu_sched_reorder for new insns
2833 that don't have INSN_PRIORITY */
2834 if (more >= 0)
2835 prev_priority = INSN_PRIORITY (insn);
2836 }
2837
2838 /* Always try issuing more insns. spu_sched_reorder will decide
2839 when the cycle should be advanced. */
2840 return 1;
2841 }
2842
2843 /* This function is called for both TARGET_SCHED_REORDER and
2844 TARGET_SCHED_REORDER2. */
2845 static int
2846 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2847 rtx_insn **ready, int *nreadyp, int clock)
2848 {
2849 int i, nready = *nreadyp;
2850 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2851 rtx_insn *insn;
2852
2853 clock_var = clock;
2854
2855 if (nready <= 0 || pipe1_clock >= clock)
2856 return 0;
2857
2858 /* Find any rtl insns that don't generate assembly insns and schedule
2859 them first. */
2860 for (i = nready - 1; i >= 0; i--)
2861 {
2862 insn = ready[i];
2863 if (INSN_CODE (insn) == -1
2864 || INSN_CODE (insn) == CODE_FOR_blockage
2865 || (INSN_P (insn) && get_attr_length (insn) == 0))
2866 {
2867 ready[i] = ready[nready - 1];
2868 ready[nready - 1] = insn;
2869 return 1;
2870 }
2871 }
2872
2873 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2874 for (i = 0; i < nready; i++)
2875 if (INSN_CODE (ready[i]) != -1)
2876 {
2877 insn = ready[i];
2878 switch (get_attr_type (insn))
2879 {
2880 default:
2881 case TYPE_MULTI0:
2882 case TYPE_CONVERT:
2883 case TYPE_FX2:
2884 case TYPE_FX3:
2885 case TYPE_SPR:
2886 case TYPE_NOP:
2887 case TYPE_FXB:
2888 case TYPE_FPD:
2889 case TYPE_FP6:
2890 case TYPE_FP7:
2891 pipe_0 = i;
2892 break;
2893 case TYPE_LOAD:
2894 case TYPE_STORE:
2895 pipe_ls = i;
2896 /* FALLTHRU */
2897 case TYPE_LNOP:
2898 case TYPE_SHUF:
2899 case TYPE_BR:
2900 case TYPE_MULTI1:
2901 case TYPE_HBR:
2902 pipe_1 = i;
2903 break;
2904 case TYPE_IPREFETCH:
2905 pipe_hbrp = i;
2906 break;
2907 }
2908 }
2909
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed && pipe_ls >= 0)
2913 {
2914 insn = ready[pipe_ls];
2915 ready[pipe_ls] = ready[nready - 1];
2916 ready[nready - 1] = insn;
2917 return 1;
2918 }
2919
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2921 if (pipe_hbrp >= 0)
2922 pipe_1 = pipe_hbrp;
2923
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2926 instead. */
2927 if (in_spu_reorg
2928 && spu_sched_length - spu_ls_first >= 4 * 15
2929 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2930 {
2931 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn);
2933 if (pipe0_clock < clock)
2934 PUT_MODE (insn, TImode);
2935 spu_sched_variable_issue (file, verbose, insn, -1);
2936 return 0;
2937 }
2938
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg && spu_dual_nops < 10)
2945 {
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2949 && (spu_dual_nops == 0
2950 || (pipe_1 != -1
2951 && prev_priority >
2952 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2953 return 0;
2954
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2958 && (spu_dual_nops == 0
2959 || (prev_priority >
2960 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2961 return 1;
2962 }
2963
2964
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock < clock && pipe_0 >= 0)
2968 schedule_i = pipe_0;
2969
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2972 else
2973 schedule_i = pipe_1;
2974
2975 if (schedule_i > -1)
2976 {
2977 insn = ready[schedule_i];
2978 ready[schedule_i] = ready[nready - 1];
2979 ready[nready - 1] = insn;
2980 return 1;
2981 }
2982 return 0;
2983 }
2984
2985 /* INSN is dependent on DEP_INSN. */
2986 static int
2987 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2988 int cost, unsigned int)
2989 {
2990 rtx set;
2991
2992 /* The blockage pattern is used to prevent instructions from being
2993 moved across it and has no cost. */
2994 if (INSN_CODE (insn) == CODE_FOR_blockage
2995 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2996 return 0;
2997
2998 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2999 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3000 return 0;
3001
3002 /* Make sure hbrps are spread out. */
3003 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3004 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3005 return 8;
3006
3007 /* Make sure hints and hbrps are 2 cycles apart. */
3008 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3009 || INSN_CODE (insn) == CODE_FOR_hbr)
3010 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3012 return 2;
3013
3014 /* An hbrp has no real dependency on other insns. */
3015 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3017 return 0;
3018
3019 /* Assuming that it is unlikely an argument register will be used in
3020 the first cycle of the called function, we reduce the cost for
3021 slightly better scheduling of dep_insn. When not hinted, the
3022 mispredicted branch would hide the cost as well. */
3023 if (CALL_P (insn))
3024 {
3025 rtx target = get_branch_target (insn);
3026 if (GET_CODE (target) != REG || !set_of (target, insn))
3027 return cost - 2;
3028 return cost;
3029 }
3030
3031 /* And when returning from a function, let's assume the return values
3032 are completed sooner too. */
3033 if (CALL_P (dep_insn))
3034 return cost - 2;
3035
3036 /* Make sure an instruction that loads from the back chain is schedule
3037 away from the return instruction so a hint is more likely to get
3038 issued. */
3039 if (INSN_CODE (insn) == CODE_FOR__return
3040 && (set = single_set (dep_insn))
3041 && GET_CODE (SET_DEST (set)) == REG
3042 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3043 return 20;
3044
3045 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3046 scheduler makes every insn in a block anti-dependent on the final
3047 jump_insn. We adjust here so higher cost insns will get scheduled
3048 earlier. */
3049 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3050 return insn_sched_cost (dep_insn) - 3;
3051
3052 return cost;
3053 }
3054 \f
3055 /* Create a CONST_DOUBLE from a string. */
3056 rtx
3057 spu_float_const (const char *string, machine_mode mode)
3058 {
3059 REAL_VALUE_TYPE value;
3060 value = REAL_VALUE_ATOF (string, mode);
3061 return const_double_from_real_value (value, mode);
3062 }
3063
3064 int
3065 spu_constant_address_p (rtx x)
3066 {
3067 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3068 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3069 || GET_CODE (x) == HIGH);
3070 }
3071
3072 static enum spu_immediate
3073 which_immediate_load (HOST_WIDE_INT val)
3074 {
3075 gcc_assert (val == trunc_int_for_mode (val, SImode));
3076
3077 if (val >= -0x8000 && val <= 0x7fff)
3078 return SPU_IL;
3079 if (val >= 0 && val <= 0x3ffff)
3080 return SPU_ILA;
3081 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3082 return SPU_ILH;
3083 if ((val & 0xffff) == 0)
3084 return SPU_ILHU;
3085
3086 return SPU_NONE;
3087 }
3088
3089 /* Return true when OP can be loaded by one of the il instructions, or
3090 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3091 int
3092 immediate_load_p (rtx op, machine_mode mode)
3093 {
3094 if (CONSTANT_P (op))
3095 {
3096 enum immediate_class c = classify_immediate (op, mode);
3097 return c == IC_IL1 || c == IC_IL1s
3098 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3099 }
3100 return 0;
3101 }
3102
3103 /* Return true if the first SIZE bytes of arr is a constant that can be
3104 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3105 represent the size and offset of the instruction to use. */
3106 static int
3107 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3108 {
3109 int cpat, run, i, start;
3110 cpat = 1;
3111 run = 0;
3112 start = -1;
3113 for (i = 0; i < size && cpat; i++)
3114 if (arr[i] != i+16)
3115 {
3116 if (!run)
3117 {
3118 start = i;
3119 if (arr[i] == 3)
3120 run = 1;
3121 else if (arr[i] == 2 && arr[i+1] == 3)
3122 run = 2;
3123 else if (arr[i] == 0)
3124 {
3125 while (arr[i+run] == run && i+run < 16)
3126 run++;
3127 if (run != 4 && run != 8)
3128 cpat = 0;
3129 }
3130 else
3131 cpat = 0;
3132 if ((i & (run-1)) != 0)
3133 cpat = 0;
3134 i += run;
3135 }
3136 else
3137 cpat = 0;
3138 }
3139 if (cpat && (run || size < 16))
3140 {
3141 if (run == 0)
3142 run = 1;
3143 if (prun)
3144 *prun = run;
3145 if (pstart)
3146 *pstart = start == -1 ? 16-run : start;
3147 return 1;
3148 }
3149 return 0;
3150 }
3151
3152 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3153 it into a register. MODE is only valid when OP is a CONST_INT. */
3154 static enum immediate_class
3155 classify_immediate (rtx op, machine_mode mode)
3156 {
3157 HOST_WIDE_INT val;
3158 unsigned char arr[16];
3159 int i, j, repeated, fsmbi, repeat;
3160
3161 gcc_assert (CONSTANT_P (op));
3162
3163 if (GET_MODE (op) != VOIDmode)
3164 mode = GET_MODE (op);
3165
3166 /* A V4SI const_vector with all identical symbols is ok. */
3167 if (!flag_pic
3168 && mode == V4SImode
3169 && GET_CODE (op) == CONST_VECTOR
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3171 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3172 op = unwrap_const_vec_duplicate (op);
3173
3174 switch (GET_CODE (op))
3175 {
3176 case SYMBOL_REF:
3177 case LABEL_REF:
3178 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3179
3180 case CONST:
3181 /* We can never know if the resulting address fits in 18 bits and can be
3182 loaded with ila. For now, assume the address will not overflow if
3183 the displacement is "small" (fits 'K' constraint). */
3184 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3185 {
3186 rtx sym = XEXP (XEXP (op, 0), 0);
3187 rtx cst = XEXP (XEXP (op, 0), 1);
3188
3189 if (GET_CODE (sym) == SYMBOL_REF
3190 && GET_CODE (cst) == CONST_INT
3191 && satisfies_constraint_K (cst))
3192 return IC_IL1s;
3193 }
3194 return IC_IL2s;
3195
3196 case HIGH:
3197 return IC_IL1s;
3198
3199 case CONST_VECTOR:
3200 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3201 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3202 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3203 return IC_POOL;
3204 /* Fall through. */
3205
3206 case CONST_INT:
3207 case CONST_DOUBLE:
3208 constant_to_array (mode, op, arr);
3209
3210 /* Check that each 4-byte slot is identical. */
3211 repeated = 1;
3212 for (i = 4; i < 16; i += 4)
3213 for (j = 0; j < 4; j++)
3214 if (arr[j] != arr[i + j])
3215 repeated = 0;
3216
3217 if (repeated)
3218 {
3219 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3220 val = trunc_int_for_mode (val, SImode);
3221
3222 if (which_immediate_load (val) != SPU_NONE)
3223 return IC_IL1;
3224 }
3225
3226 /* Any mode of 2 bytes or smaller can be loaded with an il
3227 instruction. */
3228 gcc_assert (GET_MODE_SIZE (mode) > 2);
3229
3230 fsmbi = 1;
3231 repeat = 0;
3232 for (i = 0; i < 16 && fsmbi; i++)
3233 if (arr[i] != 0 && repeat == 0)
3234 repeat = arr[i];
3235 else if (arr[i] != 0 && arr[i] != repeat)
3236 fsmbi = 0;
3237 if (fsmbi)
3238 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3239
3240 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3241 return IC_CPAT;
3242
3243 if (repeated)
3244 return IC_IL2;
3245
3246 return IC_POOL;
3247 default:
3248 break;
3249 }
3250 gcc_unreachable ();
3251 }
3252
3253 static enum spu_immediate
3254 which_logical_immediate (HOST_WIDE_INT val)
3255 {
3256 gcc_assert (val == trunc_int_for_mode (val, SImode));
3257
3258 if (val >= -0x200 && val <= 0x1ff)
3259 return SPU_ORI;
3260 if (val >= 0 && val <= 0xffff)
3261 return SPU_IOHL;
3262 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3263 {
3264 val = trunc_int_for_mode (val, HImode);
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORHI;
3267 if ((val & 0xff) == ((val >> 8) & 0xff))
3268 {
3269 val = trunc_int_for_mode (val, QImode);
3270 if (val >= -0x200 && val <= 0x1ff)
3271 return SPU_ORBI;
3272 }
3273 }
3274 return SPU_NONE;
3275 }
3276
3277 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3278 CONST_DOUBLEs. */
3279 static int
3280 const_vector_immediate_p (rtx x)
3281 {
3282 int i;
3283 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3284 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3285 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3286 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3287 return 0;
3288 return 1;
3289 }
3290
3291 int
3292 logical_immediate_p (rtx op, machine_mode mode)
3293 {
3294 HOST_WIDE_INT val;
3295 unsigned char arr[16];
3296 int i, j;
3297
3298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3299 || GET_CODE (op) == CONST_VECTOR);
3300
3301 if (GET_CODE (op) == CONST_VECTOR
3302 && !const_vector_immediate_p (op))
3303 return 0;
3304
3305 if (GET_MODE (op) != VOIDmode)
3306 mode = GET_MODE (op);
3307
3308 constant_to_array (mode, op, arr);
3309
3310 /* Check that bytes are repeated. */
3311 for (i = 4; i < 16; i += 4)
3312 for (j = 0; j < 4; j++)
3313 if (arr[j] != arr[i + j])
3314 return 0;
3315
3316 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3317 val = trunc_int_for_mode (val, SImode);
3318
3319 i = which_logical_immediate (val);
3320 return i != SPU_NONE && i != SPU_IOHL;
3321 }
3322
3323 int
3324 iohl_immediate_p (rtx op, machine_mode mode)
3325 {
3326 HOST_WIDE_INT val;
3327 unsigned char arr[16];
3328 int i, j;
3329
3330 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3331 || GET_CODE (op) == CONST_VECTOR);
3332
3333 if (GET_CODE (op) == CONST_VECTOR
3334 && !const_vector_immediate_p (op))
3335 return 0;
3336
3337 if (GET_MODE (op) != VOIDmode)
3338 mode = GET_MODE (op);
3339
3340 constant_to_array (mode, op, arr);
3341
3342 /* Check that bytes are repeated. */
3343 for (i = 4; i < 16; i += 4)
3344 for (j = 0; j < 4; j++)
3345 if (arr[j] != arr[i + j])
3346 return 0;
3347
3348 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3349 val = trunc_int_for_mode (val, SImode);
3350
3351 return val >= 0 && val <= 0xffff;
3352 }
3353
3354 int
3355 arith_immediate_p (rtx op, machine_mode mode,
3356 HOST_WIDE_INT low, HOST_WIDE_INT high)
3357 {
3358 HOST_WIDE_INT val;
3359 unsigned char arr[16];
3360 int bytes, i, j;
3361
3362 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3363 || GET_CODE (op) == CONST_VECTOR);
3364
3365 if (GET_CODE (op) == CONST_VECTOR
3366 && !const_vector_immediate_p (op))
3367 return 0;
3368
3369 if (GET_MODE (op) != VOIDmode)
3370 mode = GET_MODE (op);
3371
3372 constant_to_array (mode, op, arr);
3373
3374 bytes = GET_MODE_UNIT_SIZE (mode);
3375 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3376
3377 /* Check that bytes are repeated. */
3378 for (i = bytes; i < 16; i += bytes)
3379 for (j = 0; j < bytes; j++)
3380 if (arr[j] != arr[i + j])
3381 return 0;
3382
3383 val = arr[0];
3384 for (j = 1; j < bytes; j++)
3385 val = (val << 8) | arr[j];
3386
3387 val = trunc_int_for_mode (val, mode);
3388
3389 return val >= low && val <= high;
3390 }
3391
3392 /* TRUE when op is an immediate and an exact power of 2, and given that
3393 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3394 all entries must be the same. */
3395 bool
3396 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3397 {
3398 machine_mode int_mode;
3399 HOST_WIDE_INT val;
3400 unsigned char arr[16];
3401 int bytes, i, j;
3402
3403 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3404 || GET_CODE (op) == CONST_VECTOR);
3405
3406 if (GET_CODE (op) == CONST_VECTOR
3407 && !const_vector_immediate_p (op))
3408 return 0;
3409
3410 if (GET_MODE (op) != VOIDmode)
3411 mode = GET_MODE (op);
3412
3413 constant_to_array (mode, op, arr);
3414
3415 mode = GET_MODE_INNER (mode);
3416
3417 bytes = GET_MODE_SIZE (mode);
3418 int_mode = int_mode_for_mode (mode).require ();
3419
3420 /* Check that bytes are repeated. */
3421 for (i = bytes; i < 16; i += bytes)
3422 for (j = 0; j < bytes; j++)
3423 if (arr[j] != arr[i + j])
3424 return 0;
3425
3426 val = arr[0];
3427 for (j = 1; j < bytes; j++)
3428 val = (val << 8) | arr[j];
3429
3430 val = trunc_int_for_mode (val, int_mode);
3431
3432 /* Currently, we only handle SFmode */
3433 gcc_assert (mode == SFmode);
3434 if (mode == SFmode)
3435 {
3436 int exp = (val >> 23) - 127;
3437 return val > 0 && (val & 0x007fffff) == 0
3438 && exp >= low && exp <= high;
3439 }
3440 return FALSE;
3441 }
3442
3443 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3444
3445 static bool
3446 ea_symbol_ref_p (const_rtx x)
3447 {
3448 tree decl;
3449
3450 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3451 {
3452 rtx plus = XEXP (x, 0);
3453 rtx op0 = XEXP (plus, 0);
3454 rtx op1 = XEXP (plus, 1);
3455 if (GET_CODE (op1) == CONST_INT)
3456 x = op0;
3457 }
3458
3459 return (GET_CODE (x) == SYMBOL_REF
3460 && (decl = SYMBOL_REF_DECL (x)) != 0
3461 && TREE_CODE (decl) == VAR_DECL
3462 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3463 }
3464
3465 /* We accept:
3466 - any 32-bit constant (SImode, SFmode)
3467 - any constant that can be generated with fsmbi (any mode)
3468 - a 64-bit constant where the high and low bits are identical
3469 (DImode, DFmode)
3470 - a 128-bit constant where the four 32-bit words match. */
3471 bool
3472 spu_legitimate_constant_p (machine_mode mode, rtx x)
3473 {
3474 subrtx_iterator::array_type array;
3475 if (GET_CODE (x) == HIGH)
3476 x = XEXP (x, 0);
3477
3478 /* Reject any __ea qualified reference. These can't appear in
3479 instructions but must be forced to the constant pool. */
3480 FOR_EACH_SUBRTX (iter, array, x, ALL)
3481 if (ea_symbol_ref_p (*iter))
3482 return 0;
3483
3484 /* V4SI with all identical symbols is valid. */
3485 if (!flag_pic
3486 && mode == V4SImode
3487 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3488 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3489 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3490 return const_vec_duplicate_p (x);
3491
3492 if (GET_CODE (x) == CONST_VECTOR
3493 && !const_vector_immediate_p (x))
3494 return 0;
3495 return 1;
3496 }
3497
3498 /* Valid address are:
3499 - symbol_ref, label_ref, const
3500 - reg
3501 - reg + const_int, where const_int is 16 byte aligned
3502 - reg + reg, alignment doesn't matter
3503 The alignment matters in the reg+const case because lqd and stqd
3504 ignore the 4 least significant bits of the const. We only care about
3505 16 byte modes because the expand phase will change all smaller MEM
3506 references to TImode. */
3507 static bool
3508 spu_legitimate_address_p (machine_mode mode,
3509 rtx x, bool reg_ok_strict)
3510 {
3511 int aligned = GET_MODE_SIZE (mode) >= 16;
3512 if (aligned
3513 && GET_CODE (x) == AND
3514 && GET_CODE (XEXP (x, 1)) == CONST_INT
3515 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3516 x = XEXP (x, 0);
3517 switch (GET_CODE (x))
3518 {
3519 case LABEL_REF:
3520 return !TARGET_LARGE_MEM;
3521
3522 case SYMBOL_REF:
3523 case CONST:
3524 /* Keep __ea references until reload so that spu_expand_mov can see them
3525 in MEMs. */
3526 if (ea_symbol_ref_p (x))
3527 return !reload_in_progress && !reload_completed;
3528 return !TARGET_LARGE_MEM;
3529
3530 case CONST_INT:
3531 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3532
3533 case SUBREG:
3534 x = XEXP (x, 0);
3535 if (!REG_P (x))
3536 return 0;
3537 /* FALLTHRU */
3538
3539 case REG:
3540 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3541
3542 case PLUS:
3543 case LO_SUM:
3544 {
3545 rtx op0 = XEXP (x, 0);
3546 rtx op1 = XEXP (x, 1);
3547 if (GET_CODE (op0) == SUBREG)
3548 op0 = XEXP (op0, 0);
3549 if (GET_CODE (op1) == SUBREG)
3550 op1 = XEXP (op1, 0);
3551 if (GET_CODE (op0) == REG
3552 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3553 && GET_CODE (op1) == CONST_INT
3554 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3555 /* If virtual registers are involved, the displacement will
3556 change later on anyway, so checking would be premature.
3557 Reload will make sure the final displacement after
3558 register elimination is OK. */
3559 || op0 == arg_pointer_rtx
3560 || op0 == frame_pointer_rtx
3561 || op0 == virtual_stack_vars_rtx)
3562 && (!aligned || (INTVAL (op1) & 15) == 0))
3563 return TRUE;
3564 if (GET_CODE (op0) == REG
3565 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3566 && GET_CODE (op1) == REG
3567 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3568 return TRUE;
3569 }
3570 break;
3571
3572 default:
3573 break;
3574 }
3575 return FALSE;
3576 }
3577
3578 /* Like spu_legitimate_address_p, except with named addresses. */
3579 static bool
3580 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3581 bool reg_ok_strict, addr_space_t as)
3582 {
3583 if (as == ADDR_SPACE_EA)
3584 return (REG_P (x) && (GET_MODE (x) == EAmode));
3585
3586 else if (as != ADDR_SPACE_GENERIC)
3587 gcc_unreachable ();
3588
3589 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3590 }
3591
3592 /* When the address is reg + const_int, force the const_int into a
3593 register. */
3594 static rtx
3595 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3596 machine_mode mode ATTRIBUTE_UNUSED)
3597 {
3598 rtx op0, op1;
3599 /* Make sure both operands are registers. */
3600 if (GET_CODE (x) == PLUS)
3601 {
3602 op0 = XEXP (x, 0);
3603 op1 = XEXP (x, 1);
3604 if (ALIGNED_SYMBOL_REF_P (op0))
3605 {
3606 op0 = force_reg (Pmode, op0);
3607 mark_reg_pointer (op0, 128);
3608 }
3609 else if (GET_CODE (op0) != REG)
3610 op0 = force_reg (Pmode, op0);
3611 if (ALIGNED_SYMBOL_REF_P (op1))
3612 {
3613 op1 = force_reg (Pmode, op1);
3614 mark_reg_pointer (op1, 128);
3615 }
3616 else if (GET_CODE (op1) != REG)
3617 op1 = force_reg (Pmode, op1);
3618 x = gen_rtx_PLUS (Pmode, op0, op1);
3619 }
3620 return x;
3621 }
3622
3623 /* Like spu_legitimate_address, except with named address support. */
3624 static rtx
3625 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3626 addr_space_t as)
3627 {
3628 if (as != ADDR_SPACE_GENERIC)
3629 return x;
3630
3631 return spu_legitimize_address (x, oldx, mode);
3632 }
3633
3634 /* Reload reg + const_int for out-of-range displacements. */
3635 rtx
3636 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3637 int opnum, int type)
3638 {
3639 bool removed_and = false;
3640
3641 if (GET_CODE (ad) == AND
3642 && CONST_INT_P (XEXP (ad, 1))
3643 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3644 {
3645 ad = XEXP (ad, 0);
3646 removed_and = true;
3647 }
3648
3649 if (GET_CODE (ad) == PLUS
3650 && REG_P (XEXP (ad, 0))
3651 && CONST_INT_P (XEXP (ad, 1))
3652 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3653 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3654 {
3655 /* Unshare the sum. */
3656 ad = copy_rtx (ad);
3657
3658 /* Reload the displacement. */
3659 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3660 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3661 opnum, (enum reload_type) type);
3662
3663 /* Add back AND for alignment if we stripped it. */
3664 if (removed_and)
3665 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3666
3667 return ad;
3668 }
3669
3670 return NULL_RTX;
3671 }
3672
3673 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3674 struct attribute_spec.handler. */
3675 static tree
3676 spu_handle_fndecl_attribute (tree * node,
3677 tree name,
3678 tree args ATTRIBUTE_UNUSED,
3679 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3680 {
3681 if (TREE_CODE (*node) != FUNCTION_DECL)
3682 {
3683 warning (0, "%qE attribute only applies to functions",
3684 name);
3685 *no_add_attrs = true;
3686 }
3687
3688 return NULL_TREE;
3689 }
3690
3691 /* Handle the "vector" attribute. */
3692 static tree
3693 spu_handle_vector_attribute (tree * node, tree name,
3694 tree args ATTRIBUTE_UNUSED,
3695 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3696 {
3697 tree type = *node, result = NULL_TREE;
3698 machine_mode mode;
3699 int unsigned_p;
3700
3701 while (POINTER_TYPE_P (type)
3702 || TREE_CODE (type) == FUNCTION_TYPE
3703 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3704 type = TREE_TYPE (type);
3705
3706 mode = TYPE_MODE (type);
3707
3708 unsigned_p = TYPE_UNSIGNED (type);
3709 switch (mode)
3710 {
3711 case E_DImode:
3712 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3713 break;
3714 case E_SImode:
3715 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3716 break;
3717 case E_HImode:
3718 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3719 break;
3720 case E_QImode:
3721 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3722 break;
3723 case E_SFmode:
3724 result = V4SF_type_node;
3725 break;
3726 case E_DFmode:
3727 result = V2DF_type_node;
3728 break;
3729 default:
3730 break;
3731 }
3732
3733 /* Propagate qualifiers attached to the element type
3734 onto the vector type. */
3735 if (result && result != type && TYPE_QUALS (type))
3736 result = build_qualified_type (result, TYPE_QUALS (type));
3737
3738 *no_add_attrs = true; /* No need to hang on to the attribute. */
3739
3740 if (!result)
3741 warning (0, "%qE attribute ignored", name);
3742 else
3743 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3744
3745 return NULL_TREE;
3746 }
3747
3748 /* Return nonzero if FUNC is a naked function. */
3749 static int
3750 spu_naked_function_p (tree func)
3751 {
3752 tree a;
3753
3754 if (TREE_CODE (func) != FUNCTION_DECL)
3755 abort ();
3756
3757 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3758 return a != NULL_TREE;
3759 }
3760
3761 int
3762 spu_initial_elimination_offset (int from, int to)
3763 {
3764 int saved_regs_size = spu_saved_regs_size ();
3765 int sp_offset = 0;
3766 if (!crtl->is_leaf || crtl->outgoing_args_size
3767 || get_frame_size () || saved_regs_size)
3768 sp_offset = STACK_POINTER_OFFSET;
3769 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3770 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3771 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3772 return get_frame_size ();
3773 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3774 return sp_offset + crtl->outgoing_args_size
3775 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3776 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3777 return get_frame_size () + saved_regs_size + sp_offset;
3778 else
3779 gcc_unreachable ();
3780 }
3781
3782 rtx
3783 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3784 {
3785 machine_mode mode = TYPE_MODE (type);
3786 int byte_size = ((mode == BLKmode)
3787 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3788
3789 /* Make sure small structs are left justified in a register. */
3790 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3791 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3792 {
3793 machine_mode smode;
3794 rtvec v;
3795 int i;
3796 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3797 int n = byte_size / UNITS_PER_WORD;
3798 v = rtvec_alloc (nregs);
3799 for (i = 0; i < n; i++)
3800 {
3801 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3802 gen_rtx_REG (TImode,
3803 FIRST_RETURN_REGNUM
3804 + i),
3805 GEN_INT (UNITS_PER_WORD * i));
3806 byte_size -= UNITS_PER_WORD;
3807 }
3808
3809 if (n < nregs)
3810 {
3811 if (byte_size < 4)
3812 byte_size = 4;
3813 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3814 RTVEC_ELT (v, n) =
3815 gen_rtx_EXPR_LIST (VOIDmode,
3816 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3817 GEN_INT (UNITS_PER_WORD * n));
3818 }
3819 return gen_rtx_PARALLEL (mode, v);
3820 }
3821 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3822 }
3823
3824 static rtx
3825 spu_function_arg (cumulative_args_t cum_v,
3826 machine_mode mode,
3827 const_tree type, bool named ATTRIBUTE_UNUSED)
3828 {
3829 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3830 int byte_size;
3831
3832 if (*cum >= MAX_REGISTER_ARGS)
3833 return 0;
3834
3835 byte_size = ((mode == BLKmode)
3836 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3837
3838 /* The ABI does not allow parameters to be passed partially in
3839 reg and partially in stack. */
3840 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3841 return 0;
3842
3843 /* Make sure small structs are left justified in a register. */
3844 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3845 && byte_size < UNITS_PER_WORD && byte_size > 0)
3846 {
3847 machine_mode smode;
3848 rtx gr_reg;
3849 if (byte_size < 4)
3850 byte_size = 4;
3851 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3852 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3853 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3854 const0_rtx);
3855 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3856 }
3857 else
3858 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3859 }
3860
3861 static void
3862 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3863 const_tree type, bool named ATTRIBUTE_UNUSED)
3864 {
3865 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3866
3867 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3868 ? 1
3869 : mode == BLKmode
3870 ? ((int_size_in_bytes (type) + 15) / 16)
3871 : mode == VOIDmode
3872 ? 1
3873 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3874 }
3875
3876 /* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3877 types at offset 0 in the quad-word on the stack. 8/16-bit types
3878 should be at offsets 3/2 respectively. */
3879
3880 static HOST_WIDE_INT
3881 spu_function_arg_offset (machine_mode mode, const_tree type)
3882 {
3883 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3884 return 4 - GET_MODE_SIZE (mode);
3885 return 0;
3886 }
3887
3888 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3889
3890 static pad_direction
3891 spu_function_arg_padding (machine_mode, const_tree)
3892 {
3893 return PAD_UPWARD;
3894 }
3895
3896 /* Variable sized types are passed by reference. */
3897 static bool
3898 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3899 machine_mode mode ATTRIBUTE_UNUSED,
3900 const_tree type, bool named ATTRIBUTE_UNUSED)
3901 {
3902 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3903 }
3904 \f
3905
3906 /* Var args. */
3907
3908 /* Create and return the va_list datatype.
3909
3910 On SPU, va_list is an array type equivalent to
3911
3912 typedef struct __va_list_tag
3913 {
3914 void *__args __attribute__((__aligned(16)));
3915 void *__skip __attribute__((__aligned(16)));
3916
3917 } va_list[1];
3918
3919 where __args points to the arg that will be returned by the next
3920 va_arg(), and __skip points to the previous stack frame such that
3921 when __args == __skip we should advance __args by 32 bytes. */
3922 static tree
3923 spu_build_builtin_va_list (void)
3924 {
3925 tree f_args, f_skip, record, type_decl;
3926 bool owp;
3927
3928 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3929
3930 type_decl =
3931 build_decl (BUILTINS_LOCATION,
3932 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3933
3934 f_args = build_decl (BUILTINS_LOCATION,
3935 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3936 f_skip = build_decl (BUILTINS_LOCATION,
3937 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3938
3939 DECL_FIELD_CONTEXT (f_args) = record;
3940 SET_DECL_ALIGN (f_args, 128);
3941 DECL_USER_ALIGN (f_args) = 1;
3942
3943 DECL_FIELD_CONTEXT (f_skip) = record;
3944 SET_DECL_ALIGN (f_skip, 128);
3945 DECL_USER_ALIGN (f_skip) = 1;
3946
3947 TYPE_STUB_DECL (record) = type_decl;
3948 TYPE_NAME (record) = type_decl;
3949 TYPE_FIELDS (record) = f_args;
3950 DECL_CHAIN (f_args) = f_skip;
3951
3952 /* We know this is being padded and we want it too. It is an internal
3953 type so hide the warnings from the user. */
3954 owp = warn_padded;
3955 warn_padded = false;
3956
3957 layout_type (record);
3958
3959 warn_padded = owp;
3960
3961 /* The correct type is an array type of one element. */
3962 return build_array_type (record, build_index_type (size_zero_node));
3963 }
3964
3965 /* Implement va_start by filling the va_list structure VALIST.
3966 NEXTARG points to the first anonymous stack argument.
3967
3968 The following global variables are used to initialize
3969 the va_list structure:
3970
3971 crtl->args.info;
3972 the CUMULATIVE_ARGS for this function
3973
3974 crtl->args.arg_offset_rtx:
3975 holds the offset of the first anonymous stack argument
3976 (relative to the virtual arg pointer). */
3977
3978 static void
3979 spu_va_start (tree valist, rtx nextarg)
3980 {
3981 tree f_args, f_skip;
3982 tree args, skip, t;
3983
3984 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3985 f_skip = DECL_CHAIN (f_args);
3986
3987 valist = build_simple_mem_ref (valist);
3988 args =
3989 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3990 skip =
3991 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3992
3993 /* Find the __args area. */
3994 t = make_tree (TREE_TYPE (args), nextarg);
3995 if (crtl->args.pretend_args_size > 0)
3996 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3997 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3998 TREE_SIDE_EFFECTS (t) = 1;
3999 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4000
4001 /* Find the __skip area. */
4002 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4003 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4004 - STACK_POINTER_OFFSET));
4005 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4006 TREE_SIDE_EFFECTS (t) = 1;
4007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4008 }
4009
4010 /* Gimplify va_arg by updating the va_list structure
4011 VALIST as required to retrieve an argument of type
4012 TYPE, and returning that argument.
4013
4014 ret = va_arg(VALIST, TYPE);
4015
4016 generates code equivalent to:
4017
4018 paddedsize = (sizeof(TYPE) + 15) & -16;
4019 if (VALIST.__args + paddedsize > VALIST.__skip
4020 && VALIST.__args <= VALIST.__skip)
4021 addr = VALIST.__skip + 32;
4022 else
4023 addr = VALIST.__args;
4024 VALIST.__args = addr + paddedsize;
4025 ret = *(TYPE *)addr;
4026 */
4027 static tree
4028 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4029 gimple_seq * post_p ATTRIBUTE_UNUSED)
4030 {
4031 tree f_args, f_skip;
4032 tree args, skip;
4033 HOST_WIDE_INT size, rsize;
4034 tree addr, tmp;
4035 bool pass_by_reference_p;
4036
4037 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4038 f_skip = DECL_CHAIN (f_args);
4039
4040 args =
4041 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4042 skip =
4043 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4044
4045 addr = create_tmp_var (ptr_type_node, "va_arg");
4046
4047 /* if an object is dynamically sized, a pointer to it is passed
4048 instead of the object itself. */
4049 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4050 false);
4051 if (pass_by_reference_p)
4052 type = build_pointer_type (type);
4053 size = int_size_in_bytes (type);
4054 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4055
4056 /* build conditional expression to calculate addr. The expression
4057 will be gimplified later. */
4058 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4059 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4060 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4061 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4062 unshare_expr (skip)));
4063
4064 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4065 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4066 unshare_expr (args));
4067
4068 gimplify_assign (addr, tmp, pre_p);
4069
4070 /* update VALIST.__args */
4071 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4072 gimplify_assign (unshare_expr (args), tmp, pre_p);
4073
4074 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4075 addr);
4076
4077 if (pass_by_reference_p)
4078 addr = build_va_arg_indirect_ref (addr);
4079
4080 return build_va_arg_indirect_ref (addr);
4081 }
4082
4083 /* Save parameter registers starting with the register that corresponds
4084 to the first unnamed parameters. If the first unnamed parameter is
4085 in the stack then save no registers. Set pretend_args_size to the
4086 amount of space needed to save the registers. */
4087 static void
4088 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4089 tree type, int *pretend_size, int no_rtl)
4090 {
4091 if (!no_rtl)
4092 {
4093 rtx tmp;
4094 int regno;
4095 int offset;
4096 int ncum = *get_cumulative_args (cum);
4097
4098 /* cum currently points to the last named argument, we want to
4099 start at the next argument. */
4100 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4101
4102 offset = -STACK_POINTER_OFFSET;
4103 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4104 {
4105 tmp = gen_frame_mem (V4SImode,
4106 plus_constant (Pmode, virtual_incoming_args_rtx,
4107 offset));
4108 emit_move_insn (tmp,
4109 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4110 offset += 16;
4111 }
4112 *pretend_size = offset + STACK_POINTER_OFFSET;
4113 }
4114 }
4115 \f
4116 static void
4117 spu_conditional_register_usage (void)
4118 {
4119 if (flag_pic)
4120 {
4121 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4122 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4123 }
4124 }
4125
4126 /* This is called any time we inspect the alignment of a register for
4127 addresses. */
4128 static int
4129 reg_aligned_for_addr (rtx x)
4130 {
4131 int regno =
4132 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4133 return REGNO_POINTER_ALIGN (regno) >= 128;
4134 }
4135
4136 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4137 into its SYMBOL_REF_FLAGS. */
4138 static void
4139 spu_encode_section_info (tree decl, rtx rtl, int first)
4140 {
4141 default_encode_section_info (decl, rtl, first);
4142
4143 /* If a variable has a forced alignment to < 16 bytes, mark it with
4144 SYMBOL_FLAG_ALIGN1. */
4145 if (TREE_CODE (decl) == VAR_DECL
4146 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4147 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4148 }
4149
4150 /* Return TRUE if we are certain the mem refers to a complete object
4151 which is both 16-byte aligned and padded to a 16-byte boundary. This
4152 would make it safe to store with a single instruction.
4153 We guarantee the alignment and padding for static objects by aligning
4154 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4155 FIXME: We currently cannot guarantee this for objects on the stack
4156 because assign_parm_setup_stack calls assign_stack_local with the
4157 alignment of the parameter mode and in that case the alignment never
4158 gets adjusted by LOCAL_ALIGNMENT. */
4159 static int
4160 store_with_one_insn_p (rtx mem)
4161 {
4162 machine_mode mode = GET_MODE (mem);
4163 rtx addr = XEXP (mem, 0);
4164 if (mode == BLKmode)
4165 return 0;
4166 if (GET_MODE_SIZE (mode) >= 16)
4167 return 1;
4168 /* Only static objects. */
4169 if (GET_CODE (addr) == SYMBOL_REF)
4170 {
4171 /* We use the associated declaration to make sure the access is
4172 referring to the whole object.
4173 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4174 if it is necessary. Will there be cases where one exists, and
4175 the other does not? Will there be cases where both exist, but
4176 have different types? */
4177 tree decl = MEM_EXPR (mem);
4178 if (decl
4179 && TREE_CODE (decl) == VAR_DECL
4180 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4181 return 1;
4182 decl = SYMBOL_REF_DECL (addr);
4183 if (decl
4184 && TREE_CODE (decl) == VAR_DECL
4185 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4186 return 1;
4187 }
4188 return 0;
4189 }
4190
4191 /* Return 1 when the address is not valid for a simple load and store as
4192 required by the '_mov*' patterns. We could make this less strict
4193 for loads, but we prefer mem's to look the same so they are more
4194 likely to be merged. */
4195 static int
4196 address_needs_split (rtx mem)
4197 {
4198 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4199 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4200 || !(store_with_one_insn_p (mem)
4201 || mem_is_padded_component_ref (mem))))
4202 return 1;
4203
4204 return 0;
4205 }
4206
4207 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4208 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4209 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4210
4211 /* MEM is known to be an __ea qualified memory access. Emit a call to
4212 fetch the ppu memory to local store, and return its address in local
4213 store. */
4214
4215 static void
4216 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4217 {
4218 if (is_store)
4219 {
4220 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4221 if (!cache_fetch_dirty)
4222 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4223 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4224 ea_addr, EAmode, ndirty, SImode);
4225 }
4226 else
4227 {
4228 if (!cache_fetch)
4229 cache_fetch = init_one_libfunc ("__cache_fetch");
4230 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4231 ea_addr, EAmode);
4232 }
4233 }
4234
4235 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4236 dirty bit marking, inline.
4237
4238 The cache control data structure is an array of
4239
4240 struct __cache_tag_array
4241 {
4242 unsigned int tag_lo[4];
4243 unsigned int tag_hi[4];
4244 void *data_pointer[4];
4245 int reserved[4];
4246 vector unsigned short dirty_bits[4];
4247 } */
4248
4249 static void
4250 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4251 {
4252 rtx ea_addr_si;
4253 HOST_WIDE_INT v;
4254 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4255 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4256 rtx index_mask = gen_reg_rtx (SImode);
4257 rtx tag_arr = gen_reg_rtx (Pmode);
4258 rtx splat_mask = gen_reg_rtx (TImode);
4259 rtx splat = gen_reg_rtx (V4SImode);
4260 rtx splat_hi = NULL_RTX;
4261 rtx tag_index = gen_reg_rtx (Pmode);
4262 rtx block_off = gen_reg_rtx (SImode);
4263 rtx tag_addr = gen_reg_rtx (Pmode);
4264 rtx tag = gen_reg_rtx (V4SImode);
4265 rtx cache_tag = gen_reg_rtx (V4SImode);
4266 rtx cache_tag_hi = NULL_RTX;
4267 rtx cache_ptrs = gen_reg_rtx (TImode);
4268 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4269 rtx tag_equal = gen_reg_rtx (V4SImode);
4270 rtx tag_equal_hi = NULL_RTX;
4271 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4272 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4273 rtx eq_index = gen_reg_rtx (SImode);
4274 rtx bcomp, hit_label, hit_ref, cont_label;
4275 rtx_insn *insn;
4276
4277 if (spu_ea_model != 32)
4278 {
4279 splat_hi = gen_reg_rtx (V4SImode);
4280 cache_tag_hi = gen_reg_rtx (V4SImode);
4281 tag_equal_hi = gen_reg_rtx (V4SImode);
4282 }
4283
4284 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4285 emit_move_insn (tag_arr, tag_arr_sym);
4286 v = 0x0001020300010203LL;
4287 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4288 ea_addr_si = ea_addr;
4289 if (spu_ea_model != 32)
4290 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4291
4292 /* tag_index = ea_addr & (tag_array_size - 128) */
4293 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4294
4295 /* splat ea_addr to all 4 slots. */
4296 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4297 /* Similarly for high 32 bits of ea_addr. */
4298 if (spu_ea_model != 32)
4299 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4300
4301 /* block_off = ea_addr & 127 */
4302 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4303
4304 /* tag_addr = tag_arr + tag_index */
4305 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4306
4307 /* Read cache tags. */
4308 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4309 if (spu_ea_model != 32)
4310 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4311 plus_constant (Pmode,
4312 tag_addr, 16)));
4313
4314 /* tag = ea_addr & -128 */
4315 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4316
4317 /* Read all four cache data pointers. */
4318 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4319 plus_constant (Pmode,
4320 tag_addr, 32)));
4321
4322 /* Compare tags. */
4323 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4324 if (spu_ea_model != 32)
4325 {
4326 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4327 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4328 }
4329
4330 /* At most one of the tags compare equal, so tag_equal has one
4331 32-bit slot set to all 1's, with the other slots all zero.
4332 gbb picks off low bit from each byte in the 128-bit registers,
4333 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4334 we have a hit. */
4335 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4336 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4337
4338 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4339 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4340
4341 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4342 (rotating eq_index mod 16 bytes). */
4343 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4344 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4345
4346 /* Add block offset to form final data address. */
4347 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4348
4349 /* Check that we did hit. */
4350 hit_label = gen_label_rtx ();
4351 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4352 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4353 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4354 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4355 hit_ref, pc_rtx)));
4356 /* Say that this branch is very likely to happen. */
4357 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4358
4359 ea_load_store (mem, is_store, ea_addr, data_addr);
4360 cont_label = gen_label_rtx ();
4361 emit_jump_insn (gen_jump (cont_label));
4362 emit_barrier ();
4363
4364 emit_label (hit_label);
4365
4366 if (is_store)
4367 {
4368 HOST_WIDE_INT v_hi;
4369 rtx dirty_bits = gen_reg_rtx (TImode);
4370 rtx dirty_off = gen_reg_rtx (SImode);
4371 rtx dirty_128 = gen_reg_rtx (TImode);
4372 rtx neg_block_off = gen_reg_rtx (SImode);
4373
4374 /* Set up mask with one dirty bit per byte of the mem we are
4375 writing, starting from top bit. */
4376 v_hi = v = -1;
4377 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4378 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4379 {
4380 v_hi = v;
4381 v = 0;
4382 }
4383 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4384
4385 /* Form index into cache dirty_bits. eq_index is one of
4386 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4387 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4388 offset to each of the four dirty_bits elements. */
4389 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4390
4391 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4392
4393 /* Rotate bit mask to proper bit. */
4394 emit_insn (gen_negsi2 (neg_block_off, block_off));
4395 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4396 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4397
4398 /* Or in the new dirty bits. */
4399 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4400
4401 /* Store. */
4402 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4403 }
4404
4405 emit_label (cont_label);
4406 }
4407
4408 static rtx
4409 expand_ea_mem (rtx mem, bool is_store)
4410 {
4411 rtx ea_addr;
4412 rtx data_addr = gen_reg_rtx (Pmode);
4413 rtx new_mem;
4414
4415 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4416 if (optimize_size || optimize == 0)
4417 ea_load_store (mem, is_store, ea_addr, data_addr);
4418 else
4419 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4420
4421 if (ea_alias_set == -1)
4422 ea_alias_set = new_alias_set ();
4423
4424 /* We generate a new MEM RTX to refer to the copy of the data
4425 in the cache. We do not copy memory attributes (except the
4426 alignment) from the original MEM, as they may no longer apply
4427 to the cache copy. */
4428 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4429 set_mem_alias_set (new_mem, ea_alias_set);
4430 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4431
4432 return new_mem;
4433 }
4434
4435 int
4436 spu_expand_mov (rtx * ops, machine_mode mode)
4437 {
4438 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4439 {
4440 /* Perform the move in the destination SUBREG's inner mode. */
4441 ops[0] = SUBREG_REG (ops[0]);
4442 mode = GET_MODE (ops[0]);
4443 ops[1] = gen_lowpart_common (mode, ops[1]);
4444 gcc_assert (ops[1]);
4445 }
4446
4447 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4448 {
4449 rtx from = SUBREG_REG (ops[1]);
4450 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4451
4452 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4453 && GET_MODE_CLASS (imode) == MODE_INT
4454 && subreg_lowpart_p (ops[1]));
4455
4456 if (GET_MODE_SIZE (imode) < 4)
4457 imode = SImode;
4458 if (imode != GET_MODE (from))
4459 from = gen_rtx_SUBREG (imode, from, 0);
4460
4461 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4462 {
4463 enum insn_code icode = convert_optab_handler (trunc_optab,
4464 mode, imode);
4465 emit_insn (GEN_FCN (icode) (ops[0], from));
4466 }
4467 else
4468 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4469 return 1;
4470 }
4471
4472 /* At least one of the operands needs to be a register. */
4473 if ((reload_in_progress | reload_completed) == 0
4474 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4475 {
4476 rtx temp = force_reg (mode, ops[1]);
4477 emit_move_insn (ops[0], temp);
4478 return 1;
4479 }
4480 if (reload_in_progress || reload_completed)
4481 {
4482 if (CONSTANT_P (ops[1]))
4483 return spu_split_immediate (ops);
4484 return 0;
4485 }
4486
4487 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4488 extend them. */
4489 if (GET_CODE (ops[1]) == CONST_INT)
4490 {
4491 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4492 if (val != INTVAL (ops[1]))
4493 {
4494 emit_move_insn (ops[0], GEN_INT (val));
4495 return 1;
4496 }
4497 }
4498 if (MEM_P (ops[0]))
4499 {
4500 if (MEM_ADDR_SPACE (ops[0]))
4501 ops[0] = expand_ea_mem (ops[0], true);
4502 return spu_split_store (ops);
4503 }
4504 if (MEM_P (ops[1]))
4505 {
4506 if (MEM_ADDR_SPACE (ops[1]))
4507 ops[1] = expand_ea_mem (ops[1], false);
4508 return spu_split_load (ops);
4509 }
4510
4511 return 0;
4512 }
4513
4514 static void
4515 spu_convert_move (rtx dst, rtx src)
4516 {
4517 machine_mode mode = GET_MODE (dst);
4518 machine_mode int_mode = int_mode_for_mode (mode).require ();
4519 rtx reg;
4520 gcc_assert (GET_MODE (src) == TImode);
4521 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4522 emit_insn (gen_rtx_SET (reg,
4523 gen_rtx_TRUNCATE (int_mode,
4524 gen_rtx_LSHIFTRT (TImode, src,
4525 GEN_INT (int_mode == DImode ? 64 : 96)))));
4526 if (int_mode != mode)
4527 {
4528 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4529 emit_move_insn (dst, reg);
4530 }
4531 }
4532
4533 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4534 the address from SRC and SRC+16. Return a REG or CONST_INT that
4535 specifies how many bytes to rotate the loaded registers, plus any
4536 extra from EXTRA_ROTQBY. The address and rotate amounts are
4537 normalized to improve merging of loads and rotate computations. */
4538 static rtx
4539 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4540 {
4541 rtx addr = XEXP (src, 0);
4542 rtx p0, p1, rot, addr0, addr1;
4543 int rot_amt;
4544
4545 rot = 0;
4546 rot_amt = 0;
4547
4548 if (MEM_ALIGN (src) >= 128)
4549 /* Address is already aligned; simply perform a TImode load. */ ;
4550 else if (GET_CODE (addr) == PLUS)
4551 {
4552 /* 8 cases:
4553 aligned reg + aligned reg => lqx
4554 aligned reg + unaligned reg => lqx, rotqby
4555 aligned reg + aligned const => lqd
4556 aligned reg + unaligned const => lqd, rotqbyi
4557 unaligned reg + aligned reg => lqx, rotqby
4558 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4559 unaligned reg + aligned const => lqd, rotqby
4560 unaligned reg + unaligned const -> not allowed by legitimate address
4561 */
4562 p0 = XEXP (addr, 0);
4563 p1 = XEXP (addr, 1);
4564 if (!reg_aligned_for_addr (p0))
4565 {
4566 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4567 {
4568 rot = gen_reg_rtx (SImode);
4569 emit_insn (gen_addsi3 (rot, p0, p1));
4570 }
4571 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4572 {
4573 if (INTVAL (p1) > 0
4574 && REG_POINTER (p0)
4575 && INTVAL (p1) * BITS_PER_UNIT
4576 < REGNO_POINTER_ALIGN (REGNO (p0)))
4577 {
4578 rot = gen_reg_rtx (SImode);
4579 emit_insn (gen_addsi3 (rot, p0, p1));
4580 addr = p0;
4581 }
4582 else
4583 {
4584 rtx x = gen_reg_rtx (SImode);
4585 emit_move_insn (x, p1);
4586 if (!spu_arith_operand (p1, SImode))
4587 p1 = x;
4588 rot = gen_reg_rtx (SImode);
4589 emit_insn (gen_addsi3 (rot, p0, p1));
4590 addr = gen_rtx_PLUS (Pmode, p0, x);
4591 }
4592 }
4593 else
4594 rot = p0;
4595 }
4596 else
4597 {
4598 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4599 {
4600 rot_amt = INTVAL (p1) & 15;
4601 if (INTVAL (p1) & -16)
4602 {
4603 p1 = GEN_INT (INTVAL (p1) & -16);
4604 addr = gen_rtx_PLUS (SImode, p0, p1);
4605 }
4606 else
4607 addr = p0;
4608 }
4609 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4610 rot = p1;
4611 }
4612 }
4613 else if (REG_P (addr))
4614 {
4615 if (!reg_aligned_for_addr (addr))
4616 rot = addr;
4617 }
4618 else if (GET_CODE (addr) == CONST)
4619 {
4620 if (GET_CODE (XEXP (addr, 0)) == PLUS
4621 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4622 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4623 {
4624 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4625 if (rot_amt & -16)
4626 addr = gen_rtx_CONST (Pmode,
4627 gen_rtx_PLUS (Pmode,
4628 XEXP (XEXP (addr, 0), 0),
4629 GEN_INT (rot_amt & -16)));
4630 else
4631 addr = XEXP (XEXP (addr, 0), 0);
4632 }
4633 else
4634 {
4635 rot = gen_reg_rtx (Pmode);
4636 emit_move_insn (rot, addr);
4637 }
4638 }
4639 else if (GET_CODE (addr) == CONST_INT)
4640 {
4641 rot_amt = INTVAL (addr);
4642 addr = GEN_INT (rot_amt & -16);
4643 }
4644 else if (!ALIGNED_SYMBOL_REF_P (addr))
4645 {
4646 rot = gen_reg_rtx (Pmode);
4647 emit_move_insn (rot, addr);
4648 }
4649
4650 rot_amt += extra_rotby;
4651
4652 rot_amt &= 15;
4653
4654 if (rot && rot_amt)
4655 {
4656 rtx x = gen_reg_rtx (SImode);
4657 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4658 rot = x;
4659 rot_amt = 0;
4660 }
4661 if (!rot && rot_amt)
4662 rot = GEN_INT (rot_amt);
4663
4664 addr0 = copy_rtx (addr);
4665 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4666 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4667
4668 if (dst1)
4669 {
4670 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4671 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4672 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4673 }
4674
4675 return rot;
4676 }
4677
4678 int
4679 spu_split_load (rtx * ops)
4680 {
4681 machine_mode mode = GET_MODE (ops[0]);
4682 rtx addr, load, rot;
4683 int rot_amt;
4684
4685 if (GET_MODE_SIZE (mode) >= 16)
4686 return 0;
4687
4688 addr = XEXP (ops[1], 0);
4689 gcc_assert (GET_CODE (addr) != AND);
4690
4691 if (!address_needs_split (ops[1]))
4692 {
4693 ops[1] = change_address (ops[1], TImode, addr);
4694 load = gen_reg_rtx (TImode);
4695 emit_insn (gen__movti (load, ops[1]));
4696 spu_convert_move (ops[0], load);
4697 return 1;
4698 }
4699
4700 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4701
4702 load = gen_reg_rtx (TImode);
4703 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4704
4705 if (rot)
4706 emit_insn (gen_rotqby_ti (load, load, rot));
4707
4708 spu_convert_move (ops[0], load);
4709 return 1;
4710 }
4711
4712 int
4713 spu_split_store (rtx * ops)
4714 {
4715 machine_mode mode = GET_MODE (ops[0]);
4716 rtx reg;
4717 rtx addr, p0, p1, p1_lo, smem;
4718 int aform;
4719 int scalar;
4720
4721 if (GET_MODE_SIZE (mode) >= 16)
4722 return 0;
4723
4724 addr = XEXP (ops[0], 0);
4725 gcc_assert (GET_CODE (addr) != AND);
4726
4727 if (!address_needs_split (ops[0]))
4728 {
4729 reg = gen_reg_rtx (TImode);
4730 emit_insn (gen_spu_convert (reg, ops[1]));
4731 ops[0] = change_address (ops[0], TImode, addr);
4732 emit_move_insn (ops[0], reg);
4733 return 1;
4734 }
4735
4736 if (GET_CODE (addr) == PLUS)
4737 {
4738 /* 8 cases:
4739 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4740 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4741 aligned reg + aligned const => lqd, c?d, shuf, stqx
4742 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4743 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4744 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4745 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4746 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4747 */
4748 aform = 0;
4749 p0 = XEXP (addr, 0);
4750 p1 = p1_lo = XEXP (addr, 1);
4751 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4752 {
4753 p1_lo = GEN_INT (INTVAL (p1) & 15);
4754 if (reg_aligned_for_addr (p0))
4755 {
4756 p1 = GEN_INT (INTVAL (p1) & -16);
4757 if (p1 == const0_rtx)
4758 addr = p0;
4759 else
4760 addr = gen_rtx_PLUS (SImode, p0, p1);
4761 }
4762 else
4763 {
4764 rtx x = gen_reg_rtx (SImode);
4765 emit_move_insn (x, p1);
4766 addr = gen_rtx_PLUS (SImode, p0, x);
4767 }
4768 }
4769 }
4770 else if (REG_P (addr))
4771 {
4772 aform = 0;
4773 p0 = addr;
4774 p1 = p1_lo = const0_rtx;
4775 }
4776 else
4777 {
4778 aform = 1;
4779 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4780 p1 = 0; /* aform doesn't use p1 */
4781 p1_lo = addr;
4782 if (ALIGNED_SYMBOL_REF_P (addr))
4783 p1_lo = const0_rtx;
4784 else if (GET_CODE (addr) == CONST
4785 && GET_CODE (XEXP (addr, 0)) == PLUS
4786 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4787 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4788 {
4789 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4790 if ((v & -16) != 0)
4791 addr = gen_rtx_CONST (Pmode,
4792 gen_rtx_PLUS (Pmode,
4793 XEXP (XEXP (addr, 0), 0),
4794 GEN_INT (v & -16)));
4795 else
4796 addr = XEXP (XEXP (addr, 0), 0);
4797 p1_lo = GEN_INT (v & 15);
4798 }
4799 else if (GET_CODE (addr) == CONST_INT)
4800 {
4801 p1_lo = GEN_INT (INTVAL (addr) & 15);
4802 addr = GEN_INT (INTVAL (addr) & -16);
4803 }
4804 else
4805 {
4806 p1_lo = gen_reg_rtx (SImode);
4807 emit_move_insn (p1_lo, addr);
4808 }
4809 }
4810
4811 gcc_assert (aform == 0 || aform == 1);
4812 reg = gen_reg_rtx (TImode);
4813
4814 scalar = store_with_one_insn_p (ops[0]);
4815 if (!scalar)
4816 {
4817 /* We could copy the flags from the ops[0] MEM to mem here,
4818 We don't because we want this load to be optimized away if
4819 possible, and copying the flags will prevent that in certain
4820 cases, e.g. consider the volatile flag. */
4821
4822 rtx pat = gen_reg_rtx (TImode);
4823 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4824 set_mem_alias_set (lmem, 0);
4825 emit_insn (gen_movti (reg, lmem));
4826
4827 if (!p0 || reg_aligned_for_addr (p0))
4828 p0 = stack_pointer_rtx;
4829 if (!p1_lo)
4830 p1_lo = const0_rtx;
4831
4832 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4833 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4834 }
4835 else
4836 {
4837 if (GET_CODE (ops[1]) == REG)
4838 emit_insn (gen_spu_convert (reg, ops[1]));
4839 else if (GET_CODE (ops[1]) == SUBREG)
4840 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4841 else
4842 abort ();
4843 }
4844
4845 if (GET_MODE_SIZE (mode) < 4 && scalar)
4846 emit_insn (gen_ashlti3
4847 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4848
4849 smem = change_address (ops[0], TImode, copy_rtx (addr));
4850 /* We can't use the previous alias set because the memory has changed
4851 size and can potentially overlap objects of other types. */
4852 set_mem_alias_set (smem, 0);
4853
4854 emit_insn (gen_movti (smem, reg));
4855 return 1;
4856 }
4857
4858 /* Return TRUE if X is MEM which is a struct member reference
4859 and the member can safely be loaded and stored with a single
4860 instruction because it is padded. */
4861 static int
4862 mem_is_padded_component_ref (rtx x)
4863 {
4864 tree t = MEM_EXPR (x);
4865 tree r;
4866 if (!t || TREE_CODE (t) != COMPONENT_REF)
4867 return 0;
4868 t = TREE_OPERAND (t, 1);
4869 if (!t || TREE_CODE (t) != FIELD_DECL
4870 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4871 return 0;
4872 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4873 r = DECL_FIELD_CONTEXT (t);
4874 if (!r || TREE_CODE (r) != RECORD_TYPE)
4875 return 0;
4876 /* Make sure they are the same mode */
4877 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4878 return 0;
4879 /* If there are no following fields then the field alignment assures
4880 the structure is padded to the alignment which means this field is
4881 padded too. */
4882 if (TREE_CHAIN (t) == 0)
4883 return 1;
4884 /* If the following field is also aligned then this field will be
4885 padded. */
4886 t = TREE_CHAIN (t);
4887 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4888 return 1;
4889 return 0;
4890 }
4891
4892 /* Parse the -mfixed-range= option string. */
4893 static void
4894 fix_range (const char *const_str)
4895 {
4896 int i, first, last;
4897 char *str, *dash, *comma;
4898
4899 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4900 REG2 are either register names or register numbers. The effect
4901 of this option is to mark the registers in the range from REG1 to
4902 REG2 as ``fixed'' so they won't be used by the compiler. */
4903
4904 i = strlen (const_str);
4905 str = (char *) alloca (i + 1);
4906 memcpy (str, const_str, i + 1);
4907
4908 while (1)
4909 {
4910 dash = strchr (str, '-');
4911 if (!dash)
4912 {
4913 warning (0, "value of -mfixed-range must have form REG1-REG2");
4914 return;
4915 }
4916 *dash = '\0';
4917 comma = strchr (dash + 1, ',');
4918 if (comma)
4919 *comma = '\0';
4920
4921 first = decode_reg_name (str);
4922 if (first < 0)
4923 {
4924 warning (0, "unknown register name: %s", str);
4925 return;
4926 }
4927
4928 last = decode_reg_name (dash + 1);
4929 if (last < 0)
4930 {
4931 warning (0, "unknown register name: %s", dash + 1);
4932 return;
4933 }
4934
4935 *dash = '-';
4936
4937 if (first > last)
4938 {
4939 warning (0, "%s-%s is an empty range", str, dash + 1);
4940 return;
4941 }
4942
4943 for (i = first; i <= last; ++i)
4944 fixed_regs[i] = call_used_regs[i] = 1;
4945
4946 if (!comma)
4947 break;
4948
4949 *comma = ',';
4950 str = comma + 1;
4951 }
4952 }
4953
4954 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4955 can be generated using the fsmbi instruction. */
4956 int
4957 fsmbi_const_p (rtx x)
4958 {
4959 if (CONSTANT_P (x))
4960 {
4961 /* We can always choose TImode for CONST_INT because the high bits
4962 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4963 enum immediate_class c = classify_immediate (x, TImode);
4964 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4965 }
4966 return 0;
4967 }
4968
4969 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4970 can be generated using the cbd, chd, cwd or cdd instruction. */
4971 int
4972 cpat_const_p (rtx x, machine_mode mode)
4973 {
4974 if (CONSTANT_P (x))
4975 {
4976 enum immediate_class c = classify_immediate (x, mode);
4977 return c == IC_CPAT;
4978 }
4979 return 0;
4980 }
4981
4982 rtx
4983 gen_cpat_const (rtx * ops)
4984 {
4985 unsigned char dst[16];
4986 int i, offset, shift, isize;
4987 if (GET_CODE (ops[3]) != CONST_INT
4988 || GET_CODE (ops[2]) != CONST_INT
4989 || (GET_CODE (ops[1]) != CONST_INT
4990 && GET_CODE (ops[1]) != REG))
4991 return 0;
4992 if (GET_CODE (ops[1]) == REG
4993 && (!REG_POINTER (ops[1])
4994 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4995 return 0;
4996
4997 for (i = 0; i < 16; i++)
4998 dst[i] = i + 16;
4999 isize = INTVAL (ops[3]);
5000 if (isize == 1)
5001 shift = 3;
5002 else if (isize == 2)
5003 shift = 2;
5004 else
5005 shift = 0;
5006 offset = (INTVAL (ops[2]) +
5007 (GET_CODE (ops[1]) ==
5008 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5009 for (i = 0; i < isize; i++)
5010 dst[offset + i] = i + shift;
5011 return array_to_constant (TImode, dst);
5012 }
5013
5014 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5015 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5016 than 16 bytes, the value is repeated across the rest of the array. */
5017 void
5018 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5019 {
5020 HOST_WIDE_INT val;
5021 int i, j, first;
5022
5023 memset (arr, 0, 16);
5024 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5025 if (GET_CODE (x) == CONST_INT
5026 || (GET_CODE (x) == CONST_DOUBLE
5027 && (mode == SFmode || mode == DFmode)))
5028 {
5029 gcc_assert (mode != VOIDmode && mode != BLKmode);
5030
5031 if (GET_CODE (x) == CONST_DOUBLE)
5032 val = const_double_to_hwint (x);
5033 else
5034 val = INTVAL (x);
5035 first = GET_MODE_SIZE (mode) - 1;
5036 for (i = first; i >= 0; i--)
5037 {
5038 arr[i] = val & 0xff;
5039 val >>= 8;
5040 }
5041 /* Splat the constant across the whole array. */
5042 for (j = 0, i = first + 1; i < 16; i++)
5043 {
5044 arr[i] = arr[j];
5045 j = (j == first) ? 0 : j + 1;
5046 }
5047 }
5048 else if (GET_CODE (x) == CONST_DOUBLE)
5049 {
5050 val = CONST_DOUBLE_LOW (x);
5051 for (i = 15; i >= 8; i--)
5052 {
5053 arr[i] = val & 0xff;
5054 val >>= 8;
5055 }
5056 val = CONST_DOUBLE_HIGH (x);
5057 for (i = 7; i >= 0; i--)
5058 {
5059 arr[i] = val & 0xff;
5060 val >>= 8;
5061 }
5062 }
5063 else if (GET_CODE (x) == CONST_VECTOR)
5064 {
5065 int units;
5066 rtx elt;
5067 mode = GET_MODE_INNER (mode);
5068 units = CONST_VECTOR_NUNITS (x);
5069 for (i = 0; i < units; i++)
5070 {
5071 elt = CONST_VECTOR_ELT (x, i);
5072 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5073 {
5074 if (GET_CODE (elt) == CONST_DOUBLE)
5075 val = const_double_to_hwint (elt);
5076 else
5077 val = INTVAL (elt);
5078 first = GET_MODE_SIZE (mode) - 1;
5079 if (first + i * GET_MODE_SIZE (mode) > 16)
5080 abort ();
5081 for (j = first; j >= 0; j--)
5082 {
5083 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5084 val >>= 8;
5085 }
5086 }
5087 }
5088 }
5089 else
5090 gcc_unreachable();
5091 }
5092
5093 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5094 smaller than 16 bytes, use the bytes that would represent that value
5095 in a register, e.g., for QImode return the value of arr[3]. */
5096 rtx
5097 array_to_constant (machine_mode mode, const unsigned char arr[16])
5098 {
5099 machine_mode inner_mode;
5100 rtvec v;
5101 int units, size, i, j, k;
5102 HOST_WIDE_INT val;
5103
5104 if (GET_MODE_CLASS (mode) == MODE_INT
5105 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5106 {
5107 j = GET_MODE_SIZE (mode);
5108 i = j < 4 ? 4 - j : 0;
5109 for (val = 0; i < j; i++)
5110 val = (val << 8) | arr[i];
5111 val = trunc_int_for_mode (val, mode);
5112 return GEN_INT (val);
5113 }
5114
5115 if (mode == TImode)
5116 {
5117 HOST_WIDE_INT high;
5118 for (i = high = 0; i < 8; i++)
5119 high = (high << 8) | arr[i];
5120 for (i = 8, val = 0; i < 16; i++)
5121 val = (val << 8) | arr[i];
5122 return immed_double_const (val, high, TImode);
5123 }
5124 if (mode == SFmode)
5125 {
5126 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5127 val = trunc_int_for_mode (val, SImode);
5128 return hwint_to_const_double (SFmode, val);
5129 }
5130 if (mode == DFmode)
5131 {
5132 for (i = 0, val = 0; i < 8; i++)
5133 val = (val << 8) | arr[i];
5134 return hwint_to_const_double (DFmode, val);
5135 }
5136
5137 if (!VECTOR_MODE_P (mode))
5138 abort ();
5139
5140 units = GET_MODE_NUNITS (mode);
5141 size = GET_MODE_UNIT_SIZE (mode);
5142 inner_mode = GET_MODE_INNER (mode);
5143 v = rtvec_alloc (units);
5144
5145 for (k = i = 0; i < units; ++i)
5146 {
5147 val = 0;
5148 for (j = 0; j < size; j++, k++)
5149 val = (val << 8) | arr[k];
5150
5151 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5152 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5153 else
5154 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5155 }
5156 if (k > 16)
5157 abort ();
5158
5159 return gen_rtx_CONST_VECTOR (mode, v);
5160 }
5161
5162 static void
5163 reloc_diagnostic (rtx x)
5164 {
5165 tree decl = 0;
5166 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5167 return;
5168
5169 if (GET_CODE (x) == SYMBOL_REF)
5170 decl = SYMBOL_REF_DECL (x);
5171 else if (GET_CODE (x) == CONST
5172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5173 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5174
5175 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5176 if (decl && !DECL_P (decl))
5177 decl = 0;
5178
5179 /* The decl could be a string constant. */
5180 if (decl && DECL_P (decl))
5181 {
5182 location_t loc;
5183 /* We use last_assemble_variable_decl to get line information. It's
5184 not always going to be right and might not even be close, but will
5185 be right for the more common cases. */
5186 if (!last_assemble_variable_decl || in_section == ctors_section)
5187 loc = DECL_SOURCE_LOCATION (decl);
5188 else
5189 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5190
5191 if (TARGET_WARN_RELOC)
5192 warning_at (loc, 0,
5193 "creating run-time relocation for %qD", decl);
5194 else
5195 error_at (loc,
5196 "creating run-time relocation for %qD", decl);
5197 }
5198 else
5199 {
5200 if (TARGET_WARN_RELOC)
5201 warning_at (input_location, 0, "creating run-time relocation");
5202 else
5203 error_at (input_location, "creating run-time relocation");
5204 }
5205 }
5206
5207 /* Hook into assemble_integer so we can generate an error for run-time
5208 relocations. The SPU ABI disallows them. */
5209 static bool
5210 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5211 {
5212 /* By default run-time relocations aren't supported, but we allow them
5213 in case users support it in their own run-time loader. And we provide
5214 a warning for those users that don't. */
5215 if ((GET_CODE (x) == SYMBOL_REF)
5216 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5217 reloc_diagnostic (x);
5218
5219 return default_assemble_integer (x, size, aligned_p);
5220 }
5221
5222 static void
5223 spu_asm_globalize_label (FILE * file, const char *name)
5224 {
5225 fputs ("\t.global\t", file);
5226 assemble_name (file, name);
5227 fputs ("\n", file);
5228 }
5229
5230 static bool
5231 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5232 int opno ATTRIBUTE_UNUSED, int *total,
5233 bool speed ATTRIBUTE_UNUSED)
5234 {
5235 int code = GET_CODE (x);
5236 int cost = COSTS_N_INSNS (2);
5237
5238 /* Folding to a CONST_VECTOR will use extra space but there might
5239 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5240 only if it allows us to fold away multiple insns. Changing the cost
5241 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5242 because this cost will only be compared against a single insn.
5243 if (code == CONST_VECTOR)
5244 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5245 */
5246
5247 /* Use defaults for float operations. Not accurate but good enough. */
5248 if (mode == DFmode)
5249 {
5250 *total = COSTS_N_INSNS (13);
5251 return true;
5252 }
5253 if (mode == SFmode)
5254 {
5255 *total = COSTS_N_INSNS (6);
5256 return true;
5257 }
5258 switch (code)
5259 {
5260 case CONST_INT:
5261 if (satisfies_constraint_K (x))
5262 *total = 0;
5263 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5264 *total = COSTS_N_INSNS (1);
5265 else
5266 *total = COSTS_N_INSNS (3);
5267 return true;
5268
5269 case CONST:
5270 *total = COSTS_N_INSNS (3);
5271 return true;
5272
5273 case LABEL_REF:
5274 case SYMBOL_REF:
5275 *total = COSTS_N_INSNS (0);
5276 return true;
5277
5278 case CONST_DOUBLE:
5279 *total = COSTS_N_INSNS (5);
5280 return true;
5281
5282 case FLOAT_EXTEND:
5283 case FLOAT_TRUNCATE:
5284 case FLOAT:
5285 case UNSIGNED_FLOAT:
5286 case FIX:
5287 case UNSIGNED_FIX:
5288 *total = COSTS_N_INSNS (7);
5289 return true;
5290
5291 case PLUS:
5292 if (mode == TImode)
5293 {
5294 *total = COSTS_N_INSNS (9);
5295 return true;
5296 }
5297 break;
5298
5299 case MULT:
5300 cost =
5301 GET_CODE (XEXP (x, 0)) ==
5302 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5303 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5304 {
5305 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5306 {
5307 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5308 cost = COSTS_N_INSNS (14);
5309 if ((val & 0xffff) == 0)
5310 cost = COSTS_N_INSNS (9);
5311 else if (val > 0 && val < 0x10000)
5312 cost = COSTS_N_INSNS (11);
5313 }
5314 }
5315 *total = cost;
5316 return true;
5317 case DIV:
5318 case UDIV:
5319 case MOD:
5320 case UMOD:
5321 *total = COSTS_N_INSNS (20);
5322 return true;
5323 case ROTATE:
5324 case ROTATERT:
5325 case ASHIFT:
5326 case ASHIFTRT:
5327 case LSHIFTRT:
5328 *total = COSTS_N_INSNS (4);
5329 return true;
5330 case UNSPEC:
5331 if (XINT (x, 1) == UNSPEC_CONVERT)
5332 *total = COSTS_N_INSNS (0);
5333 else
5334 *total = COSTS_N_INSNS (4);
5335 return true;
5336 }
5337 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5338 if (GET_MODE_CLASS (mode) == MODE_INT
5339 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5340 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5341 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5342 *total = cost;
5343 return true;
5344 }
5345
5346 static scalar_int_mode
5347 spu_unwind_word_mode (void)
5348 {
5349 return SImode;
5350 }
5351
5352 /* Decide whether we can make a sibling call to a function. DECL is the
5353 declaration of the function being targeted by the call and EXP is the
5354 CALL_EXPR representing the call. */
5355 static bool
5356 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5357 {
5358 return decl && !TARGET_LARGE_MEM;
5359 }
5360
5361 /* We need to correctly update the back chain pointer and the Available
5362 Stack Size (which is in the second slot of the sp register.) */
5363 void
5364 spu_allocate_stack (rtx op0, rtx op1)
5365 {
5366 HOST_WIDE_INT v;
5367 rtx chain = gen_reg_rtx (V4SImode);
5368 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5369 rtx sp = gen_reg_rtx (V4SImode);
5370 rtx splatted = gen_reg_rtx (V4SImode);
5371 rtx pat = gen_reg_rtx (TImode);
5372
5373 /* copy the back chain so we can save it back again. */
5374 emit_move_insn (chain, stack_bot);
5375
5376 op1 = force_reg (SImode, op1);
5377
5378 v = 0x1020300010203ll;
5379 emit_move_insn (pat, immed_double_const (v, v, TImode));
5380 emit_insn (gen_shufb (splatted, op1, op1, pat));
5381
5382 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5383 emit_insn (gen_subv4si3 (sp, sp, splatted));
5384
5385 if (flag_stack_check || flag_stack_clash_protection)
5386 {
5387 rtx avail = gen_reg_rtx(SImode);
5388 rtx result = gen_reg_rtx(SImode);
5389 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5390 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5391 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5392 }
5393
5394 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5395
5396 emit_move_insn (stack_bot, chain);
5397
5398 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5399 }
5400
5401 void
5402 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5403 {
5404 static unsigned char arr[16] =
5405 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5406 rtx temp = gen_reg_rtx (SImode);
5407 rtx temp2 = gen_reg_rtx (SImode);
5408 rtx temp3 = gen_reg_rtx (V4SImode);
5409 rtx temp4 = gen_reg_rtx (V4SImode);
5410 rtx pat = gen_reg_rtx (TImode);
5411 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5412
5413 /* Restore the backchain from the first word, sp from the second. */
5414 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5415 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5416
5417 emit_move_insn (pat, array_to_constant (TImode, arr));
5418
5419 /* Compute Available Stack Size for sp */
5420 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5421 emit_insn (gen_shufb (temp3, temp, temp, pat));
5422
5423 /* Compute Available Stack Size for back chain */
5424 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5425 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5426 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5427
5428 emit_insn (gen_addv4si3 (sp, sp, temp3));
5429 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5430 }
5431
5432 static void
5433 spu_init_libfuncs (void)
5434 {
5435 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5436 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5437 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5438 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5439 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5440 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5441 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5442 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5443 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5444 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5445 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5446 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5447
5448 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5449 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5450
5451 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5452 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5453 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5454 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5455 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5456 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5457 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5458 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5459 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5460 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5461 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5462 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5463
5464 set_optab_libfunc (smul_optab, TImode, "__multi3");
5465 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5466 set_optab_libfunc (smod_optab, TImode, "__modti3");
5467 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5468 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5469 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5470 }
5471
5472 /* Make a subreg, stripping any existing subreg. We could possibly just
5473 call simplify_subreg, but in this case we know what we want. */
5474 rtx
5475 spu_gen_subreg (machine_mode mode, rtx x)
5476 {
5477 if (GET_CODE (x) == SUBREG)
5478 x = SUBREG_REG (x);
5479 if (GET_MODE (x) == mode)
5480 return x;
5481 return gen_rtx_SUBREG (mode, x, 0);
5482 }
5483
5484 static bool
5485 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5486 {
5487 return (TYPE_MODE (type) == BLKmode
5488 && ((type) == 0
5489 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5490 || int_size_in_bytes (type) >
5491 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5492 }
5493 \f
5494 /* Create the built-in types and functions */
5495
5496 enum spu_function_code
5497 {
5498 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5499 #include "spu-builtins.def"
5500 #undef DEF_BUILTIN
5501 NUM_SPU_BUILTINS
5502 };
5503
5504 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5505
5506 struct spu_builtin_description spu_builtins[] = {
5507 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5508 {fcode, icode, name, type, params},
5509 #include "spu-builtins.def"
5510 #undef DEF_BUILTIN
5511 };
5512
5513 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5514
5515 /* Returns the spu builtin decl for CODE. */
5516
5517 static tree
5518 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5519 {
5520 if (code >= NUM_SPU_BUILTINS)
5521 return error_mark_node;
5522
5523 return spu_builtin_decls[code];
5524 }
5525
5526
5527 static void
5528 spu_init_builtins (void)
5529 {
5530 struct spu_builtin_description *d;
5531 unsigned int i;
5532
5533 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5534 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5535 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5536 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5537 V4SF_type_node = build_vector_type (float_type_node, 4);
5538 V2DF_type_node = build_vector_type (double_type_node, 2);
5539
5540 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5541 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5542 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5543 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5544
5545 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5546
5547 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5548 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5559
5560 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5561 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5562 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5564 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5565 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5566 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5568
5569 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5570 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5571
5572 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5573
5574 spu_builtin_types[SPU_BTI_PTR] =
5575 build_pointer_type (build_qualified_type
5576 (void_type_node,
5577 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5578
5579 /* For each builtin we build a new prototype. The tree code will make
5580 sure nodes are shared. */
5581 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5582 {
5583 tree p;
5584 char name[64]; /* build_function will make a copy. */
5585 int parm;
5586
5587 if (d->name == 0)
5588 continue;
5589
5590 /* Find last parm. */
5591 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5592 ;
5593
5594 p = void_list_node;
5595 while (parm > 1)
5596 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5597
5598 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5599
5600 sprintf (name, "__builtin_%s", d->name);
5601 spu_builtin_decls[i] =
5602 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5603 if (d->fcode == SPU_MASK_FOR_LOAD)
5604 TREE_READONLY (spu_builtin_decls[i]) = 1;
5605
5606 /* These builtins don't throw. */
5607 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5608 }
5609 }
5610
5611 void
5612 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5613 {
5614 static unsigned char arr[16] =
5615 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5616
5617 rtx temp = gen_reg_rtx (Pmode);
5618 rtx temp2 = gen_reg_rtx (V4SImode);
5619 rtx temp3 = gen_reg_rtx (V4SImode);
5620 rtx pat = gen_reg_rtx (TImode);
5621 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5622
5623 emit_move_insn (pat, array_to_constant (TImode, arr));
5624
5625 /* Restore the sp. */
5626 emit_move_insn (temp, op1);
5627 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5628
5629 /* Compute available stack size for sp. */
5630 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5631 emit_insn (gen_shufb (temp3, temp, temp, pat));
5632
5633 emit_insn (gen_addv4si3 (sp, sp, temp3));
5634 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5635 }
5636
5637 int
5638 spu_safe_dma (HOST_WIDE_INT channel)
5639 {
5640 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5641 }
5642
5643 void
5644 spu_builtin_splats (rtx ops[])
5645 {
5646 machine_mode mode = GET_MODE (ops[0]);
5647 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5648 {
5649 unsigned char arr[16];
5650 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5651 emit_move_insn (ops[0], array_to_constant (mode, arr));
5652 }
5653 else
5654 {
5655 rtx reg = gen_reg_rtx (TImode);
5656 rtx shuf;
5657 if (GET_CODE (ops[1]) != REG
5658 && GET_CODE (ops[1]) != SUBREG)
5659 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5660 switch (mode)
5661 {
5662 case E_V2DImode:
5663 case E_V2DFmode:
5664 shuf =
5665 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5666 TImode);
5667 break;
5668 case E_V4SImode:
5669 case E_V4SFmode:
5670 shuf =
5671 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5672 TImode);
5673 break;
5674 case E_V8HImode:
5675 shuf =
5676 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5677 TImode);
5678 break;
5679 case E_V16QImode:
5680 shuf =
5681 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5682 TImode);
5683 break;
5684 default:
5685 abort ();
5686 }
5687 emit_move_insn (reg, shuf);
5688 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5689 }
5690 }
5691
5692 void
5693 spu_builtin_extract (rtx ops[])
5694 {
5695 machine_mode mode;
5696 rtx rot, from, tmp;
5697
5698 mode = GET_MODE (ops[1]);
5699
5700 if (GET_CODE (ops[2]) == CONST_INT)
5701 {
5702 switch (mode)
5703 {
5704 case E_V16QImode:
5705 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5706 break;
5707 case E_V8HImode:
5708 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5709 break;
5710 case E_V4SFmode:
5711 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5712 break;
5713 case E_V4SImode:
5714 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5715 break;
5716 case E_V2DImode:
5717 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5718 break;
5719 case E_V2DFmode:
5720 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5721 break;
5722 default:
5723 abort ();
5724 }
5725 return;
5726 }
5727
5728 from = spu_gen_subreg (TImode, ops[1]);
5729 rot = gen_reg_rtx (TImode);
5730 tmp = gen_reg_rtx (SImode);
5731
5732 switch (mode)
5733 {
5734 case E_V16QImode:
5735 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5736 break;
5737 case E_V8HImode:
5738 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5739 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5740 break;
5741 case E_V4SFmode:
5742 case E_V4SImode:
5743 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5744 break;
5745 case E_V2DImode:
5746 case E_V2DFmode:
5747 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5748 break;
5749 default:
5750 abort ();
5751 }
5752 emit_insn (gen_rotqby_ti (rot, from, tmp));
5753
5754 emit_insn (gen_spu_convert (ops[0], rot));
5755 }
5756
5757 void
5758 spu_builtin_insert (rtx ops[])
5759 {
5760 machine_mode mode = GET_MODE (ops[0]);
5761 machine_mode imode = GET_MODE_INNER (mode);
5762 rtx mask = gen_reg_rtx (TImode);
5763 rtx offset;
5764
5765 if (GET_CODE (ops[3]) == CONST_INT)
5766 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5767 else
5768 {
5769 offset = gen_reg_rtx (SImode);
5770 emit_insn (gen_mulsi3
5771 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5772 }
5773 emit_insn (gen_cpat
5774 (mask, stack_pointer_rtx, offset,
5775 GEN_INT (GET_MODE_SIZE (imode))));
5776 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5777 }
5778
5779 void
5780 spu_builtin_promote (rtx ops[])
5781 {
5782 machine_mode mode, imode;
5783 rtx rot, from, offset;
5784 HOST_WIDE_INT pos;
5785
5786 mode = GET_MODE (ops[0]);
5787 imode = GET_MODE_INNER (mode);
5788
5789 from = gen_reg_rtx (TImode);
5790 rot = spu_gen_subreg (TImode, ops[0]);
5791
5792 emit_insn (gen_spu_convert (from, ops[1]));
5793
5794 if (GET_CODE (ops[2]) == CONST_INT)
5795 {
5796 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5797 if (GET_MODE_SIZE (imode) < 4)
5798 pos += 4 - GET_MODE_SIZE (imode);
5799 offset = GEN_INT (pos & 15);
5800 }
5801 else
5802 {
5803 offset = gen_reg_rtx (SImode);
5804 switch (mode)
5805 {
5806 case E_V16QImode:
5807 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5808 break;
5809 case E_V8HImode:
5810 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5811 emit_insn (gen_addsi3 (offset, offset, offset));
5812 break;
5813 case E_V4SFmode:
5814 case E_V4SImode:
5815 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5816 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5817 break;
5818 case E_V2DImode:
5819 case E_V2DFmode:
5820 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5821 break;
5822 default:
5823 abort ();
5824 }
5825 }
5826 emit_insn (gen_rotqby_ti (rot, from, offset));
5827 }
5828
5829 static void
5830 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5831 {
5832 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5833 rtx shuf = gen_reg_rtx (V4SImode);
5834 rtx insn = gen_reg_rtx (V4SImode);
5835 rtx shufc;
5836 rtx insnc;
5837 rtx mem;
5838
5839 fnaddr = force_reg (SImode, fnaddr);
5840 cxt = force_reg (SImode, cxt);
5841
5842 if (TARGET_LARGE_MEM)
5843 {
5844 rtx rotl = gen_reg_rtx (V4SImode);
5845 rtx mask = gen_reg_rtx (V4SImode);
5846 rtx bi = gen_reg_rtx (SImode);
5847 static unsigned char const shufa[16] = {
5848 2, 3, 0, 1, 18, 19, 16, 17,
5849 0, 1, 2, 3, 16, 17, 18, 19
5850 };
5851 static unsigned char const insna[16] = {
5852 0x41, 0, 0, 79,
5853 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5854 0x60, 0x80, 0, 79,
5855 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5856 };
5857
5858 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5859 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5860
5861 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5862 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5863 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5864 emit_insn (gen_selb (insn, insnc, rotl, mask));
5865
5866 mem = adjust_address (m_tramp, V4SImode, 0);
5867 emit_move_insn (mem, insn);
5868
5869 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5870 mem = adjust_address (m_tramp, Pmode, 16);
5871 emit_move_insn (mem, bi);
5872 }
5873 else
5874 {
5875 rtx scxt = gen_reg_rtx (SImode);
5876 rtx sfnaddr = gen_reg_rtx (SImode);
5877 static unsigned char const insna[16] = {
5878 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5879 0x30, 0, 0, 0,
5880 0, 0, 0, 0,
5881 0, 0, 0, 0
5882 };
5883
5884 shufc = gen_reg_rtx (TImode);
5885 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5886
5887 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5888 fits 18 bits and the last 4 are zeros. This will be true if
5889 the stack pointer is initialized to 0x3fff0 at program start,
5890 otherwise the ila instruction will be garbage. */
5891
5892 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5893 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5894 emit_insn (gen_cpat
5895 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5896 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5897 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5898
5899 mem = adjust_address (m_tramp, V4SImode, 0);
5900 emit_move_insn (mem, insn);
5901 }
5902 emit_insn (gen_sync ());
5903 }
5904
5905 static bool
5906 spu_warn_func_return (tree decl)
5907 {
5908 /* Naked functions are implemented entirely in assembly, including the
5909 return sequence, so suppress warnings about this. */
5910 return !spu_naked_function_p (decl);
5911 }
5912
5913 void
5914 spu_expand_sign_extend (rtx ops[])
5915 {
5916 unsigned char arr[16];
5917 rtx pat = gen_reg_rtx (TImode);
5918 rtx sign, c;
5919 int i, last;
5920 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5921 if (GET_MODE (ops[1]) == QImode)
5922 {
5923 sign = gen_reg_rtx (HImode);
5924 emit_insn (gen_extendqihi2 (sign, ops[1]));
5925 for (i = 0; i < 16; i++)
5926 arr[i] = 0x12;
5927 arr[last] = 0x13;
5928 }
5929 else
5930 {
5931 for (i = 0; i < 16; i++)
5932 arr[i] = 0x10;
5933 switch (GET_MODE (ops[1]))
5934 {
5935 case E_HImode:
5936 sign = gen_reg_rtx (SImode);
5937 emit_insn (gen_extendhisi2 (sign, ops[1]));
5938 arr[last] = 0x03;
5939 arr[last - 1] = 0x02;
5940 break;
5941 case E_SImode:
5942 sign = gen_reg_rtx (SImode);
5943 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5944 for (i = 0; i < 4; i++)
5945 arr[last - i] = 3 - i;
5946 break;
5947 case E_DImode:
5948 sign = gen_reg_rtx (SImode);
5949 c = gen_reg_rtx (SImode);
5950 emit_insn (gen_spu_convert (c, ops[1]));
5951 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5952 for (i = 0; i < 8; i++)
5953 arr[last - i] = 7 - i;
5954 break;
5955 default:
5956 abort ();
5957 }
5958 }
5959 emit_move_insn (pat, array_to_constant (TImode, arr));
5960 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5961 }
5962
5963 /* expand vector initialization. If there are any constant parts,
5964 load constant parts first. Then load any non-constant parts. */
5965 void
5966 spu_expand_vector_init (rtx target, rtx vals)
5967 {
5968 machine_mode mode = GET_MODE (target);
5969 int n_elts = GET_MODE_NUNITS (mode);
5970 int n_var = 0;
5971 bool all_same = true;
5972 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5973 int i;
5974
5975 first = XVECEXP (vals, 0, 0);
5976 for (i = 0; i < n_elts; ++i)
5977 {
5978 x = XVECEXP (vals, 0, i);
5979 if (!(CONST_INT_P (x)
5980 || GET_CODE (x) == CONST_DOUBLE
5981 || GET_CODE (x) == CONST_FIXED))
5982 ++n_var;
5983 else
5984 {
5985 if (first_constant == NULL_RTX)
5986 first_constant = x;
5987 }
5988 if (i > 0 && !rtx_equal_p (x, first))
5989 all_same = false;
5990 }
5991
5992 /* if all elements are the same, use splats to repeat elements */
5993 if (all_same)
5994 {
5995 if (!CONSTANT_P (first)
5996 && !register_operand (first, GET_MODE (x)))
5997 first = force_reg (GET_MODE (first), first);
5998 emit_insn (gen_spu_splats (target, first));
5999 return;
6000 }
6001
6002 /* load constant parts */
6003 if (n_var != n_elts)
6004 {
6005 if (n_var == 0)
6006 {
6007 emit_move_insn (target,
6008 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6009 }
6010 else
6011 {
6012 rtx constant_parts_rtx = copy_rtx (vals);
6013
6014 gcc_assert (first_constant != NULL_RTX);
6015 /* fill empty slots with the first constant, this increases
6016 our chance of using splats in the recursive call below. */
6017 for (i = 0; i < n_elts; ++i)
6018 {
6019 x = XVECEXP (constant_parts_rtx, 0, i);
6020 if (!(CONST_INT_P (x)
6021 || GET_CODE (x) == CONST_DOUBLE
6022 || GET_CODE (x) == CONST_FIXED))
6023 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6024 }
6025
6026 spu_expand_vector_init (target, constant_parts_rtx);
6027 }
6028 }
6029
6030 /* load variable parts */
6031 if (n_var != 0)
6032 {
6033 rtx insert_operands[4];
6034
6035 insert_operands[0] = target;
6036 insert_operands[2] = target;
6037 for (i = 0; i < n_elts; ++i)
6038 {
6039 x = XVECEXP (vals, 0, i);
6040 if (!(CONST_INT_P (x)
6041 || GET_CODE (x) == CONST_DOUBLE
6042 || GET_CODE (x) == CONST_FIXED))
6043 {
6044 if (!register_operand (x, GET_MODE (x)))
6045 x = force_reg (GET_MODE (x), x);
6046 insert_operands[1] = x;
6047 insert_operands[3] = GEN_INT (i);
6048 spu_builtin_insert (insert_operands);
6049 }
6050 }
6051 }
6052 }
6053
6054 /* Return insn index for the vector compare instruction for given CODE,
6055 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6056
6057 static int
6058 get_vec_cmp_insn (enum rtx_code code,
6059 machine_mode dest_mode,
6060 machine_mode op_mode)
6061
6062 {
6063 switch (code)
6064 {
6065 case EQ:
6066 if (dest_mode == V16QImode && op_mode == V16QImode)
6067 return CODE_FOR_ceq_v16qi;
6068 if (dest_mode == V8HImode && op_mode == V8HImode)
6069 return CODE_FOR_ceq_v8hi;
6070 if (dest_mode == V4SImode && op_mode == V4SImode)
6071 return CODE_FOR_ceq_v4si;
6072 if (dest_mode == V4SImode && op_mode == V4SFmode)
6073 return CODE_FOR_ceq_v4sf;
6074 if (dest_mode == V2DImode && op_mode == V2DFmode)
6075 return CODE_FOR_ceq_v2df;
6076 break;
6077 case GT:
6078 if (dest_mode == V16QImode && op_mode == V16QImode)
6079 return CODE_FOR_cgt_v16qi;
6080 if (dest_mode == V8HImode && op_mode == V8HImode)
6081 return CODE_FOR_cgt_v8hi;
6082 if (dest_mode == V4SImode && op_mode == V4SImode)
6083 return CODE_FOR_cgt_v4si;
6084 if (dest_mode == V4SImode && op_mode == V4SFmode)
6085 return CODE_FOR_cgt_v4sf;
6086 if (dest_mode == V2DImode && op_mode == V2DFmode)
6087 return CODE_FOR_cgt_v2df;
6088 break;
6089 case GTU:
6090 if (dest_mode == V16QImode && op_mode == V16QImode)
6091 return CODE_FOR_clgt_v16qi;
6092 if (dest_mode == V8HImode && op_mode == V8HImode)
6093 return CODE_FOR_clgt_v8hi;
6094 if (dest_mode == V4SImode && op_mode == V4SImode)
6095 return CODE_FOR_clgt_v4si;
6096 break;
6097 default:
6098 break;
6099 }
6100 return -1;
6101 }
6102
6103 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6104 DMODE is expected destination mode. This is a recursive function. */
6105
6106 static rtx
6107 spu_emit_vector_compare (enum rtx_code rcode,
6108 rtx op0, rtx op1,
6109 machine_mode dmode)
6110 {
6111 int vec_cmp_insn;
6112 rtx mask;
6113 machine_mode dest_mode;
6114 machine_mode op_mode = GET_MODE (op1);
6115
6116 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6117
6118 /* Floating point vector compare instructions uses destination V4SImode.
6119 Double floating point vector compare instructions uses destination V2DImode.
6120 Move destination to appropriate mode later. */
6121 if (dmode == V4SFmode)
6122 dest_mode = V4SImode;
6123 else if (dmode == V2DFmode)
6124 dest_mode = V2DImode;
6125 else
6126 dest_mode = dmode;
6127
6128 mask = gen_reg_rtx (dest_mode);
6129 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6130
6131 if (vec_cmp_insn == -1)
6132 {
6133 bool swap_operands = false;
6134 bool try_again = false;
6135 switch (rcode)
6136 {
6137 case LT:
6138 rcode = GT;
6139 swap_operands = true;
6140 try_again = true;
6141 break;
6142 case LTU:
6143 rcode = GTU;
6144 swap_operands = true;
6145 try_again = true;
6146 break;
6147 case NE:
6148 case UNEQ:
6149 case UNLE:
6150 case UNLT:
6151 case UNGE:
6152 case UNGT:
6153 case UNORDERED:
6154 /* Treat A != B as ~(A==B). */
6155 {
6156 enum rtx_code rev_code;
6157 enum insn_code nor_code;
6158 rtx rev_mask;
6159
6160 rev_code = reverse_condition_maybe_unordered (rcode);
6161 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6162
6163 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6164 gcc_assert (nor_code != CODE_FOR_nothing);
6165 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6166 if (dmode != dest_mode)
6167 {
6168 rtx temp = gen_reg_rtx (dest_mode);
6169 convert_move (temp, mask, 0);
6170 return temp;
6171 }
6172 return mask;
6173 }
6174 break;
6175 case GE:
6176 case GEU:
6177 case LE:
6178 case LEU:
6179 /* Try GT/GTU/LT/LTU OR EQ */
6180 {
6181 rtx c_rtx, eq_rtx;
6182 enum insn_code ior_code;
6183 enum rtx_code new_code;
6184
6185 switch (rcode)
6186 {
6187 case GE: new_code = GT; break;
6188 case GEU: new_code = GTU; break;
6189 case LE: new_code = LT; break;
6190 case LEU: new_code = LTU; break;
6191 default:
6192 gcc_unreachable ();
6193 }
6194
6195 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6196 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6197
6198 ior_code = optab_handler (ior_optab, dest_mode);
6199 gcc_assert (ior_code != CODE_FOR_nothing);
6200 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6201 if (dmode != dest_mode)
6202 {
6203 rtx temp = gen_reg_rtx (dest_mode);
6204 convert_move (temp, mask, 0);
6205 return temp;
6206 }
6207 return mask;
6208 }
6209 break;
6210 case LTGT:
6211 /* Try LT OR GT */
6212 {
6213 rtx lt_rtx, gt_rtx;
6214 enum insn_code ior_code;
6215
6216 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6217 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6218
6219 ior_code = optab_handler (ior_optab, dest_mode);
6220 gcc_assert (ior_code != CODE_FOR_nothing);
6221 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6222 if (dmode != dest_mode)
6223 {
6224 rtx temp = gen_reg_rtx (dest_mode);
6225 convert_move (temp, mask, 0);
6226 return temp;
6227 }
6228 return mask;
6229 }
6230 break;
6231 case ORDERED:
6232 /* Implement as (A==A) & (B==B) */
6233 {
6234 rtx a_rtx, b_rtx;
6235 enum insn_code and_code;
6236
6237 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6238 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6239
6240 and_code = optab_handler (and_optab, dest_mode);
6241 gcc_assert (and_code != CODE_FOR_nothing);
6242 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6243 if (dmode != dest_mode)
6244 {
6245 rtx temp = gen_reg_rtx (dest_mode);
6246 convert_move (temp, mask, 0);
6247 return temp;
6248 }
6249 return mask;
6250 }
6251 break;
6252 default:
6253 gcc_unreachable ();
6254 }
6255
6256 /* You only get two chances. */
6257 if (try_again)
6258 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6259
6260 gcc_assert (vec_cmp_insn != -1);
6261
6262 if (swap_operands)
6263 {
6264 rtx tmp;
6265 tmp = op0;
6266 op0 = op1;
6267 op1 = tmp;
6268 }
6269 }
6270
6271 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6272 if (dmode != dest_mode)
6273 {
6274 rtx temp = gen_reg_rtx (dest_mode);
6275 convert_move (temp, mask, 0);
6276 return temp;
6277 }
6278 return mask;
6279 }
6280
6281
6282 /* Emit vector conditional expression.
6283 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6284 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6285
6286 int
6287 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6288 rtx cond, rtx cc_op0, rtx cc_op1)
6289 {
6290 machine_mode dest_mode = GET_MODE (dest);
6291 enum rtx_code rcode = GET_CODE (cond);
6292 rtx mask;
6293
6294 /* Get the vector mask for the given relational operations. */
6295 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6296
6297 emit_insn(gen_selb (dest, op2, op1, mask));
6298
6299 return 1;
6300 }
6301
6302 static rtx
6303 spu_force_reg (machine_mode mode, rtx op)
6304 {
6305 rtx x, r;
6306 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6307 {
6308 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6309 || GET_MODE (op) == BLKmode)
6310 return force_reg (mode, convert_to_mode (mode, op, 0));
6311 abort ();
6312 }
6313
6314 r = force_reg (GET_MODE (op), op);
6315 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6316 {
6317 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6318 if (x)
6319 return x;
6320 }
6321
6322 x = gen_reg_rtx (mode);
6323 emit_insn (gen_spu_convert (x, r));
6324 return x;
6325 }
6326
6327 static void
6328 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6329 {
6330 HOST_WIDE_INT v = 0;
6331 int lsbits;
6332 /* Check the range of immediate operands. */
6333 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6334 {
6335 int range = p - SPU_BTI_7;
6336
6337 if (!CONSTANT_P (op))
6338 error ("%s expects an integer literal in the range [%d, %d]",
6339 d->name,
6340 spu_builtin_range[range].low, spu_builtin_range[range].high);
6341
6342 if (GET_CODE (op) == CONST
6343 && (GET_CODE (XEXP (op, 0)) == PLUS
6344 || GET_CODE (XEXP (op, 0)) == MINUS))
6345 {
6346 v = INTVAL (XEXP (XEXP (op, 0), 1));
6347 op = XEXP (XEXP (op, 0), 0);
6348 }
6349 else if (GET_CODE (op) == CONST_INT)
6350 v = INTVAL (op);
6351 else if (GET_CODE (op) == CONST_VECTOR
6352 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6353 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6354
6355 /* The default for v is 0 which is valid in every range. */
6356 if (v < spu_builtin_range[range].low
6357 || v > spu_builtin_range[range].high)
6358 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6359 d->name,
6360 spu_builtin_range[range].low, spu_builtin_range[range].high,
6361 v);
6362
6363 switch (p)
6364 {
6365 case SPU_BTI_S10_4:
6366 lsbits = 4;
6367 break;
6368 case SPU_BTI_U16_2:
6369 /* This is only used in lqa, and stqa. Even though the insns
6370 encode 16 bits of the address (all but the 2 least
6371 significant), only 14 bits are used because it is masked to
6372 be 16 byte aligned. */
6373 lsbits = 4;
6374 break;
6375 case SPU_BTI_S16_2:
6376 /* This is used for lqr and stqr. */
6377 lsbits = 2;
6378 break;
6379 default:
6380 lsbits = 0;
6381 }
6382
6383 if (GET_CODE (op) == LABEL_REF
6384 || (GET_CODE (op) == SYMBOL_REF
6385 && SYMBOL_REF_FUNCTION_P (op))
6386 || (v & ((1 << lsbits) - 1)) != 0)
6387 warning (0, "%d least significant bits of %s are ignored", lsbits,
6388 d->name);
6389 }
6390 }
6391
6392
6393 static int
6394 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6395 rtx target, rtx ops[])
6396 {
6397 enum insn_code icode = (enum insn_code) d->icode;
6398 int i = 0, a;
6399
6400 /* Expand the arguments into rtl. */
6401
6402 if (d->parm[0] != SPU_BTI_VOID)
6403 ops[i++] = target;
6404
6405 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6406 {
6407 tree arg = CALL_EXPR_ARG (exp, a);
6408 if (arg == 0)
6409 abort ();
6410 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6411 }
6412
6413 gcc_assert (i == insn_data[icode].n_generator_args);
6414 return i;
6415 }
6416
6417 static rtx
6418 spu_expand_builtin_1 (struct spu_builtin_description *d,
6419 tree exp, rtx target)
6420 {
6421 rtx pat;
6422 rtx ops[8];
6423 enum insn_code icode = (enum insn_code) d->icode;
6424 machine_mode mode, tmode;
6425 int i, p;
6426 int n_operands;
6427 tree return_type;
6428
6429 /* Set up ops[] with values from arglist. */
6430 n_operands = expand_builtin_args (d, exp, target, ops);
6431
6432 /* Handle the target operand which must be operand 0. */
6433 i = 0;
6434 if (d->parm[0] != SPU_BTI_VOID)
6435 {
6436
6437 /* We prefer the mode specified for the match_operand otherwise
6438 use the mode from the builtin function prototype. */
6439 tmode = insn_data[d->icode].operand[0].mode;
6440 if (tmode == VOIDmode)
6441 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6442
6443 /* Try to use target because not using it can lead to extra copies
6444 and when we are using all of the registers extra copies leads
6445 to extra spills. */
6446 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6447 ops[0] = target;
6448 else
6449 target = ops[0] = gen_reg_rtx (tmode);
6450
6451 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6452 abort ();
6453
6454 i++;
6455 }
6456
6457 if (d->fcode == SPU_MASK_FOR_LOAD)
6458 {
6459 machine_mode mode = insn_data[icode].operand[1].mode;
6460 tree arg;
6461 rtx addr, op, pat;
6462
6463 /* get addr */
6464 arg = CALL_EXPR_ARG (exp, 0);
6465 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6466 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6467 addr = memory_address (mode, op);
6468
6469 /* negate addr */
6470 op = gen_reg_rtx (GET_MODE (addr));
6471 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6472 op = gen_rtx_MEM (mode, op);
6473
6474 pat = GEN_FCN (icode) (target, op);
6475 if (!pat)
6476 return 0;
6477 emit_insn (pat);
6478 return target;
6479 }
6480
6481 /* Ignore align_hint, but still expand it's args in case they have
6482 side effects. */
6483 if (icode == CODE_FOR_spu_align_hint)
6484 return 0;
6485
6486 /* Handle the rest of the operands. */
6487 for (p = 1; i < n_operands; i++, p++)
6488 {
6489 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6490 mode = insn_data[d->icode].operand[i].mode;
6491 else
6492 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6493
6494 /* mode can be VOIDmode here for labels */
6495
6496 /* For specific intrinsics with an immediate operand, e.g.,
6497 si_ai(), we sometimes need to convert the scalar argument to a
6498 vector argument by splatting the scalar. */
6499 if (VECTOR_MODE_P (mode)
6500 && (GET_CODE (ops[i]) == CONST_INT
6501 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6502 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6503 {
6504 if (GET_CODE (ops[i]) == CONST_INT)
6505 ops[i] = spu_const (mode, INTVAL (ops[i]));
6506 else
6507 {
6508 rtx reg = gen_reg_rtx (mode);
6509 machine_mode imode = GET_MODE_INNER (mode);
6510 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6511 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6512 if (imode != GET_MODE (ops[i]))
6513 ops[i] = convert_to_mode (imode, ops[i],
6514 TYPE_UNSIGNED (spu_builtin_types
6515 [d->parm[i]]));
6516 emit_insn (gen_spu_splats (reg, ops[i]));
6517 ops[i] = reg;
6518 }
6519 }
6520
6521 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6522
6523 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6524 ops[i] = spu_force_reg (mode, ops[i]);
6525 }
6526
6527 switch (n_operands)
6528 {
6529 case 0:
6530 pat = GEN_FCN (icode) (0);
6531 break;
6532 case 1:
6533 pat = GEN_FCN (icode) (ops[0]);
6534 break;
6535 case 2:
6536 pat = GEN_FCN (icode) (ops[0], ops[1]);
6537 break;
6538 case 3:
6539 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6540 break;
6541 case 4:
6542 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6543 break;
6544 case 5:
6545 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6546 break;
6547 case 6:
6548 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6549 break;
6550 default:
6551 abort ();
6552 }
6553
6554 if (!pat)
6555 abort ();
6556
6557 if (d->type == B_CALL || d->type == B_BISLED)
6558 emit_call_insn (pat);
6559 else if (d->type == B_JUMP)
6560 {
6561 emit_jump_insn (pat);
6562 emit_barrier ();
6563 }
6564 else
6565 emit_insn (pat);
6566
6567 return_type = spu_builtin_types[d->parm[0]];
6568 if (d->parm[0] != SPU_BTI_VOID
6569 && GET_MODE (target) != TYPE_MODE (return_type))
6570 {
6571 /* target is the return value. It should always be the mode of
6572 the builtin function prototype. */
6573 target = spu_force_reg (TYPE_MODE (return_type), target);
6574 }
6575
6576 return target;
6577 }
6578
6579 rtx
6580 spu_expand_builtin (tree exp,
6581 rtx target,
6582 rtx subtarget ATTRIBUTE_UNUSED,
6583 machine_mode mode ATTRIBUTE_UNUSED,
6584 int ignore ATTRIBUTE_UNUSED)
6585 {
6586 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6587 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6588 struct spu_builtin_description *d;
6589
6590 if (fcode < NUM_SPU_BUILTINS)
6591 {
6592 d = &spu_builtins[fcode];
6593
6594 return spu_expand_builtin_1 (d, exp, target);
6595 }
6596 abort ();
6597 }
6598
6599 /* Implement targetm.vectorize.builtin_mask_for_load. */
6600 static tree
6601 spu_builtin_mask_for_load (void)
6602 {
6603 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6604 }
6605
6606 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6607 static int
6608 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6609 tree vectype,
6610 int misalign ATTRIBUTE_UNUSED)
6611 {
6612 unsigned elements;
6613
6614 switch (type_of_cost)
6615 {
6616 case scalar_stmt:
6617 case vector_stmt:
6618 case vector_load:
6619 case vector_store:
6620 case vec_to_scalar:
6621 case scalar_to_vec:
6622 case cond_branch_not_taken:
6623 case vec_perm:
6624 case vec_promote_demote:
6625 return 1;
6626
6627 case scalar_store:
6628 return 10;
6629
6630 case scalar_load:
6631 /* Load + rotate. */
6632 return 2;
6633
6634 case unaligned_load:
6635 case vector_gather_load:
6636 case vector_scatter_store:
6637 return 2;
6638
6639 case cond_branch_taken:
6640 return 6;
6641
6642 case vec_construct:
6643 elements = TYPE_VECTOR_SUBPARTS (vectype);
6644 return elements / 2 + 1;
6645
6646 default:
6647 gcc_unreachable ();
6648 }
6649 }
6650
6651 /* Implement targetm.vectorize.init_cost. */
6652
6653 static void *
6654 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6655 {
6656 unsigned *cost = XNEWVEC (unsigned, 3);
6657 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6658 return cost;
6659 }
6660
6661 /* Implement targetm.vectorize.add_stmt_cost. */
6662
6663 static unsigned
6664 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6665 struct _stmt_vec_info *stmt_info, int misalign,
6666 enum vect_cost_model_location where)
6667 {
6668 unsigned *cost = (unsigned *) data;
6669 unsigned retval = 0;
6670
6671 if (flag_vect_cost_model)
6672 {
6673 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6674 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6675
6676 /* Statements in an inner loop relative to the loop being
6677 vectorized are weighted more heavily. The value here is
6678 arbitrary and could potentially be improved with analysis. */
6679 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6680 count *= 50; /* FIXME. */
6681
6682 retval = (unsigned) (count * stmt_cost);
6683 cost[where] += retval;
6684 }
6685
6686 return retval;
6687 }
6688
6689 /* Implement targetm.vectorize.finish_cost. */
6690
6691 static void
6692 spu_finish_cost (void *data, unsigned *prologue_cost,
6693 unsigned *body_cost, unsigned *epilogue_cost)
6694 {
6695 unsigned *cost = (unsigned *) data;
6696 *prologue_cost = cost[vect_prologue];
6697 *body_cost = cost[vect_body];
6698 *epilogue_cost = cost[vect_epilogue];
6699 }
6700
6701 /* Implement targetm.vectorize.destroy_cost_data. */
6702
6703 static void
6704 spu_destroy_cost_data (void *data)
6705 {
6706 free (data);
6707 }
6708
6709 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6710 after applying N number of iterations. This routine does not determine
6711 how may iterations are required to reach desired alignment. */
6712
6713 static bool
6714 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6715 {
6716 if (is_packed)
6717 return false;
6718
6719 /* All other types are naturally aligned. */
6720 return true;
6721 }
6722
6723 /* Return the appropriate mode for a named address pointer. */
6724 static scalar_int_mode
6725 spu_addr_space_pointer_mode (addr_space_t addrspace)
6726 {
6727 switch (addrspace)
6728 {
6729 case ADDR_SPACE_GENERIC:
6730 return ptr_mode;
6731 case ADDR_SPACE_EA:
6732 return EAmode;
6733 default:
6734 gcc_unreachable ();
6735 }
6736 }
6737
6738 /* Return the appropriate mode for a named address address. */
6739 static scalar_int_mode
6740 spu_addr_space_address_mode (addr_space_t addrspace)
6741 {
6742 switch (addrspace)
6743 {
6744 case ADDR_SPACE_GENERIC:
6745 return Pmode;
6746 case ADDR_SPACE_EA:
6747 return EAmode;
6748 default:
6749 gcc_unreachable ();
6750 }
6751 }
6752
6753 /* Determine if one named address space is a subset of another. */
6754
6755 static bool
6756 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6757 {
6758 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6759 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6760
6761 if (subset == superset)
6762 return true;
6763
6764 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6765 being subsets but instead as disjoint address spaces. */
6766 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6767 return false;
6768
6769 else
6770 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6771 }
6772
6773 /* Convert from one address space to another. */
6774 static rtx
6775 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6776 {
6777 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6778 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6779
6780 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6781 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6782
6783 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6784 {
6785 rtx result, ls;
6786
6787 ls = gen_const_mem (DImode,
6788 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6789 set_mem_align (ls, 128);
6790
6791 result = gen_reg_rtx (Pmode);
6792 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6793 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6794 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6795 ls, const0_rtx, Pmode, 1);
6796
6797 emit_insn (gen_subsi3 (result, op, ls));
6798
6799 return result;
6800 }
6801
6802 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6803 {
6804 rtx result, ls;
6805
6806 ls = gen_const_mem (DImode,
6807 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6808 set_mem_align (ls, 128);
6809
6810 result = gen_reg_rtx (EAmode);
6811 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6812 op = force_reg (Pmode, op);
6813 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6814 ls, const0_rtx, EAmode, 1);
6815 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6816
6817 if (EAmode == SImode)
6818 emit_insn (gen_addsi3 (result, op, ls));
6819 else
6820 emit_insn (gen_adddi3 (result, op, ls));
6821
6822 return result;
6823 }
6824
6825 else
6826 gcc_unreachable ();
6827 }
6828
6829
6830 /* Count the total number of instructions in each pipe and return the
6831 maximum, which is used as the Minimum Iteration Interval (MII)
6832 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6833 -2 are instructions that can go in pipe0 or pipe1. */
6834 static int
6835 spu_sms_res_mii (struct ddg *g)
6836 {
6837 int i;
6838 unsigned t[4] = {0, 0, 0, 0};
6839
6840 for (i = 0; i < g->num_nodes; i++)
6841 {
6842 rtx_insn *insn = g->nodes[i].insn;
6843 int p = get_pipe (insn) + 2;
6844
6845 gcc_assert (p >= 0);
6846 gcc_assert (p < 4);
6847
6848 t[p]++;
6849 if (dump_file && INSN_P (insn))
6850 fprintf (dump_file, "i%d %s %d %d\n",
6851 INSN_UID (insn),
6852 insn_data[INSN_CODE(insn)].name,
6853 p, t[p]);
6854 }
6855 if (dump_file)
6856 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6857
6858 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6859 }
6860
6861
6862 void
6863 spu_init_expanders (void)
6864 {
6865 if (cfun)
6866 {
6867 rtx r0, r1;
6868 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6869 frame_pointer_needed is true. We don't know that until we're
6870 expanding the prologue. */
6871 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6872
6873 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6874 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6875 to be treated as aligned, so generate them here. */
6876 r0 = gen_reg_rtx (SImode);
6877 r1 = gen_reg_rtx (SImode);
6878 mark_reg_pointer (r0, 128);
6879 mark_reg_pointer (r1, 128);
6880 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6881 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6882 }
6883 }
6884
6885 static scalar_int_mode
6886 spu_libgcc_cmp_return_mode (void)
6887 {
6888
6889 /* For SPU word mode is TI mode so it is better to use SImode
6890 for compare returns. */
6891 return SImode;
6892 }
6893
6894 static scalar_int_mode
6895 spu_libgcc_shift_count_mode (void)
6896 {
6897 /* For SPU word mode is TI mode so it is better to use SImode
6898 for shift counts. */
6899 return SImode;
6900 }
6901
6902 /* Implement targetm.section_type_flags. */
6903 static unsigned int
6904 spu_section_type_flags (tree decl, const char *name, int reloc)
6905 {
6906 /* .toe needs to have type @nobits. */
6907 if (strcmp (name, ".toe") == 0)
6908 return SECTION_BSS;
6909 /* Don't load _ea into the current address space. */
6910 if (strcmp (name, "._ea") == 0)
6911 return SECTION_WRITE | SECTION_DEBUG;
6912 return default_section_type_flags (decl, name, reloc);
6913 }
6914
6915 /* Implement targetm.select_section. */
6916 static section *
6917 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6918 {
6919 /* Variables and constants defined in the __ea address space
6920 go into a special section named "._ea". */
6921 if (TREE_TYPE (decl) != error_mark_node
6922 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6923 {
6924 /* We might get called with string constants, but get_named_section
6925 doesn't like them as they are not DECLs. Also, we need to set
6926 flags in that case. */
6927 if (!DECL_P (decl))
6928 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6929
6930 return get_named_section (decl, "._ea", reloc);
6931 }
6932
6933 return default_elf_select_section (decl, reloc, align);
6934 }
6935
6936 /* Implement targetm.unique_section. */
6937 static void
6938 spu_unique_section (tree decl, int reloc)
6939 {
6940 /* We don't support unique section names in the __ea address
6941 space for now. */
6942 if (TREE_TYPE (decl) != error_mark_node
6943 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6944 return;
6945
6946 default_unique_section (decl, reloc);
6947 }
6948
6949 /* Generate a constant or register which contains 2^SCALE. We assume
6950 the result is valid for MODE. Currently, MODE must be V4SFmode and
6951 SCALE must be SImode. */
6952 rtx
6953 spu_gen_exp2 (machine_mode mode, rtx scale)
6954 {
6955 gcc_assert (mode == V4SFmode);
6956 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6957 if (GET_CODE (scale) != CONST_INT)
6958 {
6959 /* unsigned int exp = (127 + scale) << 23;
6960 __vector float m = (__vector float) spu_splats (exp); */
6961 rtx reg = force_reg (SImode, scale);
6962 rtx exp = gen_reg_rtx (SImode);
6963 rtx mul = gen_reg_rtx (mode);
6964 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6965 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6966 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6967 return mul;
6968 }
6969 else
6970 {
6971 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6972 unsigned char arr[16];
6973 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6974 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6975 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6976 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6977 return array_to_constant (mode, arr);
6978 }
6979 }
6980
6981 /* After reload, just change the convert into a move instruction
6982 or a dead instruction. */
6983 void
6984 spu_split_convert (rtx ops[])
6985 {
6986 if (REGNO (ops[0]) == REGNO (ops[1]))
6987 emit_note (NOTE_INSN_DELETED);
6988 else
6989 {
6990 /* Use TImode always as this might help hard reg copyprop. */
6991 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6992 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6993 emit_insn (gen_move_insn (op0, op1));
6994 }
6995 }
6996
6997 void
6998 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6999 {
7000 fprintf (file, "# profile\n");
7001 fprintf (file, "brsl $75, _mcount\n");
7002 }
7003
7004 /* Implement targetm.ref_may_alias_errno. */
7005 static bool
7006 spu_ref_may_alias_errno (ao_ref *ref)
7007 {
7008 tree base = ao_ref_base (ref);
7009
7010 /* With SPU newlib, errno is defined as something like
7011 _impure_data._errno
7012 The default implementation of this target macro does not
7013 recognize such expressions, so special-code for it here. */
7014
7015 if (TREE_CODE (base) == VAR_DECL
7016 && !TREE_STATIC (base)
7017 && DECL_EXTERNAL (base)
7018 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7019 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7020 "_impure_data") == 0
7021 /* _errno is the first member of _impure_data. */
7022 && ref->offset == 0)
7023 return true;
7024
7025 return default_ref_may_alias_errno (ref);
7026 }
7027
7028 /* Output thunk to FILE that implements a C++ virtual function call (with
7029 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7030 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7031 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7032 relative to the resulting this pointer. */
7033
7034 static void
7035 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7036 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7037 tree function)
7038 {
7039 rtx op[8];
7040
7041 /* Make sure unwind info is emitted for the thunk if needed. */
7042 final_start_function (emit_barrier (), file, 1);
7043
7044 /* Operand 0 is the target function. */
7045 op[0] = XEXP (DECL_RTL (function), 0);
7046
7047 /* Operand 1 is the 'this' pointer. */
7048 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7049 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7050 else
7051 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7052
7053 /* Operands 2/3 are the low/high halfwords of delta. */
7054 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7055 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7056
7057 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7058 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7059 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7060
7061 /* Operands 6/7 are temporary registers. */
7062 op[6] = gen_rtx_REG (Pmode, 79);
7063 op[7] = gen_rtx_REG (Pmode, 78);
7064
7065 /* Add DELTA to this pointer. */
7066 if (delta)
7067 {
7068 if (delta >= -0x200 && delta < 0x200)
7069 output_asm_insn ("ai\t%1,%1,%2", op);
7070 else if (delta >= -0x8000 && delta < 0x8000)
7071 {
7072 output_asm_insn ("il\t%6,%2", op);
7073 output_asm_insn ("a\t%1,%1,%6", op);
7074 }
7075 else
7076 {
7077 output_asm_insn ("ilhu\t%6,%3", op);
7078 output_asm_insn ("iohl\t%6,%2", op);
7079 output_asm_insn ("a\t%1,%1,%6", op);
7080 }
7081 }
7082
7083 /* Perform vcall adjustment. */
7084 if (vcall_offset)
7085 {
7086 output_asm_insn ("lqd\t%7,0(%1)", op);
7087 output_asm_insn ("rotqby\t%7,%7,%1", op);
7088
7089 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7090 output_asm_insn ("ai\t%7,%7,%4", op);
7091 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7092 {
7093 output_asm_insn ("il\t%6,%4", op);
7094 output_asm_insn ("a\t%7,%7,%6", op);
7095 }
7096 else
7097 {
7098 output_asm_insn ("ilhu\t%6,%5", op);
7099 output_asm_insn ("iohl\t%6,%4", op);
7100 output_asm_insn ("a\t%7,%7,%6", op);
7101 }
7102
7103 output_asm_insn ("lqd\t%6,0(%7)", op);
7104 output_asm_insn ("rotqby\t%6,%6,%7", op);
7105 output_asm_insn ("a\t%1,%1,%6", op);
7106 }
7107
7108 /* Jump to target. */
7109 output_asm_insn ("br\t%0", op);
7110
7111 final_end_function ();
7112 }
7113
7114 /* Canonicalize a comparison from one we don't have to one we do have. */
7115 static void
7116 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7117 bool op0_preserve_value)
7118 {
7119 if (!op0_preserve_value
7120 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7121 {
7122 rtx tem = *op0;
7123 *op0 = *op1;
7124 *op1 = tem;
7125 *code = (int)swap_condition ((enum rtx_code)*code);
7126 }
7127 }
7128
7129 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7130 to perform. MEM is the memory on which to operate. VAL is the second
7131 operand of the binary operator. BEFORE and AFTER are optional locations to
7132 return the value of MEM either before of after the operation. */
7133 void
7134 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7135 rtx orig_before, rtx orig_after)
7136 {
7137 machine_mode mode = GET_MODE (mem);
7138 rtx before = orig_before, after = orig_after;
7139
7140 if (before == NULL_RTX)
7141 before = gen_reg_rtx (mode);
7142
7143 emit_move_insn (before, mem);
7144
7145 if (code == MULT) /* NAND operation */
7146 {
7147 rtx x = expand_simple_binop (mode, AND, before, val,
7148 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7149 after = expand_simple_unop (mode, NOT, x, after, 1);
7150 }
7151 else
7152 {
7153 after = expand_simple_binop (mode, code, before, val,
7154 after, 1, OPTAB_LIB_WIDEN);
7155 }
7156
7157 emit_move_insn (mem, after);
7158
7159 if (orig_after && after != orig_after)
7160 emit_move_insn (orig_after, after);
7161 }
7162
7163 /* Implement TARGET_MODES_TIEABLE_P. */
7164
7165 static bool
7166 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7167 {
7168 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7169 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7170 }
7171
7172 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7173 in the lowpart of a register, which is only true for SPU. */
7174
7175 static bool
7176 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7177 {
7178 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7179 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7180 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7181 }
7182
7183 /* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7184
7185 static bool
7186 spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
7187 {
7188 return inprec <= 32 && outprec <= inprec;
7189 }
7190
7191 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7192
7193 Make all static objects 16-byte aligned. This allows us to assume
7194 they are also padded to 16 bytes, which means we can use a single
7195 load or store instruction to access them. */
7196
7197 static HOST_WIDE_INT
7198 spu_static_rtx_alignment (machine_mode mode)
7199 {
7200 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7201 }
7202
7203 /* Implement TARGET_CONSTANT_ALIGNMENT.
7204
7205 Make all static objects 16-byte aligned. This allows us to assume
7206 they are also padded to 16 bytes, which means we can use a single
7207 load or store instruction to access them. */
7208
7209 static HOST_WIDE_INT
7210 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7211 {
7212 return MAX (align, 128);
7213 }
7214 \f
7215 /* Table of machine attributes. */
7216 static const struct attribute_spec spu_attribute_table[] =
7217 {
7218 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7219 affects_type_identity, exclusions } */
7220 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7221 false, NULL },
7222 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7223 false, NULL },
7224 { NULL, 0, 0, false, false, false, NULL, false, NULL }
7225 };
7226
7227 /* TARGET overrides. */
7228
7229 #undef TARGET_LRA_P
7230 #define TARGET_LRA_P hook_bool_void_false
7231
7232 #undef TARGET_ADDR_SPACE_POINTER_MODE
7233 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7234
7235 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7236 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7237
7238 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7239 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7240 spu_addr_space_legitimate_address_p
7241
7242 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7243 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7244
7245 #undef TARGET_ADDR_SPACE_SUBSET_P
7246 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7247
7248 #undef TARGET_ADDR_SPACE_CONVERT
7249 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7250
7251 #undef TARGET_INIT_BUILTINS
7252 #define TARGET_INIT_BUILTINS spu_init_builtins
7253 #undef TARGET_BUILTIN_DECL
7254 #define TARGET_BUILTIN_DECL spu_builtin_decl
7255
7256 #undef TARGET_EXPAND_BUILTIN
7257 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7258
7259 #undef TARGET_UNWIND_WORD_MODE
7260 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7261
7262 #undef TARGET_LEGITIMIZE_ADDRESS
7263 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7264
7265 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7266 and .quad for the debugger. When it is known that the assembler is fixed,
7267 these can be removed. */
7268 #undef TARGET_ASM_UNALIGNED_SI_OP
7269 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7270
7271 #undef TARGET_ASM_ALIGNED_DI_OP
7272 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7273
7274 /* The .8byte directive doesn't seem to work well for a 32 bit
7275 architecture. */
7276 #undef TARGET_ASM_UNALIGNED_DI_OP
7277 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7278
7279 #undef TARGET_RTX_COSTS
7280 #define TARGET_RTX_COSTS spu_rtx_costs
7281
7282 #undef TARGET_ADDRESS_COST
7283 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7284
7285 #undef TARGET_SCHED_ISSUE_RATE
7286 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7287
7288 #undef TARGET_SCHED_INIT_GLOBAL
7289 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7290
7291 #undef TARGET_SCHED_INIT
7292 #define TARGET_SCHED_INIT spu_sched_init
7293
7294 #undef TARGET_SCHED_VARIABLE_ISSUE
7295 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7296
7297 #undef TARGET_SCHED_REORDER
7298 #define TARGET_SCHED_REORDER spu_sched_reorder
7299
7300 #undef TARGET_SCHED_REORDER2
7301 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7302
7303 #undef TARGET_SCHED_ADJUST_COST
7304 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7305
7306 #undef TARGET_ATTRIBUTE_TABLE
7307 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7308
7309 #undef TARGET_ASM_INTEGER
7310 #define TARGET_ASM_INTEGER spu_assemble_integer
7311
7312 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7313 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7314
7315 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7316 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7317
7318 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7319 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7320
7321 #undef TARGET_ASM_GLOBALIZE_LABEL
7322 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7323
7324 #undef TARGET_PASS_BY_REFERENCE
7325 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7326
7327 #undef TARGET_FUNCTION_ARG
7328 #define TARGET_FUNCTION_ARG spu_function_arg
7329
7330 #undef TARGET_FUNCTION_ARG_ADVANCE
7331 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7332
7333 #undef TARGET_FUNCTION_ARG_OFFSET
7334 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7335
7336 #undef TARGET_FUNCTION_ARG_PADDING
7337 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7338
7339 #undef TARGET_MUST_PASS_IN_STACK
7340 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7341
7342 #undef TARGET_BUILD_BUILTIN_VA_LIST
7343 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7344
7345 #undef TARGET_EXPAND_BUILTIN_VA_START
7346 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7347
7348 #undef TARGET_SETUP_INCOMING_VARARGS
7349 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7350
7351 #undef TARGET_MACHINE_DEPENDENT_REORG
7352 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7353
7354 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7355 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7356
7357 #undef TARGET_INIT_LIBFUNCS
7358 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7359
7360 #undef TARGET_RETURN_IN_MEMORY
7361 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7362
7363 #undef TARGET_ENCODE_SECTION_INFO
7364 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7365
7366 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7367 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7368
7369 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7370 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7371
7372 #undef TARGET_VECTORIZE_INIT_COST
7373 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7374
7375 #undef TARGET_VECTORIZE_ADD_STMT_COST
7376 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7377
7378 #undef TARGET_VECTORIZE_FINISH_COST
7379 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7380
7381 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7382 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7383
7384 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7385 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7386
7387 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7388 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7389
7390 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7391 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7392
7393 #undef TARGET_SCHED_SMS_RES_MII
7394 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7395
7396 #undef TARGET_SECTION_TYPE_FLAGS
7397 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7398
7399 #undef TARGET_ASM_SELECT_SECTION
7400 #define TARGET_ASM_SELECT_SECTION spu_select_section
7401
7402 #undef TARGET_ASM_UNIQUE_SECTION
7403 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7404
7405 #undef TARGET_LEGITIMATE_ADDRESS_P
7406 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7407
7408 #undef TARGET_LEGITIMATE_CONSTANT_P
7409 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7410
7411 #undef TARGET_TRAMPOLINE_INIT
7412 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7413
7414 #undef TARGET_WARN_FUNC_RETURN
7415 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7416
7417 #undef TARGET_OPTION_OVERRIDE
7418 #define TARGET_OPTION_OVERRIDE spu_option_override
7419
7420 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7421 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7422
7423 #undef TARGET_REF_MAY_ALIAS_ERRNO
7424 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7425
7426 #undef TARGET_ASM_OUTPUT_MI_THUNK
7427 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7430
7431 /* Variable tracking should be run after all optimizations which
7432 change order of insns. It also needs a valid CFG. */
7433 #undef TARGET_DELAY_VARTRACK
7434 #define TARGET_DELAY_VARTRACK true
7435
7436 #undef TARGET_CANONICALIZE_COMPARISON
7437 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7438
7439 #undef TARGET_CAN_USE_DOLOOP_P
7440 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7441
7442 #undef TARGET_MODES_TIEABLE_P
7443 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7444
7445 #undef TARGET_HARD_REGNO_NREGS
7446 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7447
7448 #undef TARGET_CAN_CHANGE_MODE_CLASS
7449 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7450
7451 #undef TARGET_TRULY_NOOP_TRUNCATION
7452 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7453
7454 #undef TARGET_STATIC_RTX_ALIGNMENT
7455 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7456 #undef TARGET_CONSTANT_ALIGNMENT
7457 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7458
7459 struct gcc_target targetm = TARGET_INITIALIZER;
7460
7461 #include "gt-spu.h"