]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/c6x/c6x.c
This patch rewrites the old VEC macro-based interface into a new one
[thirdparty/gcc.git] / gcc / config / c6x / c6x.c
CommitLineData
5aa04b01 1/* Target Code for TI C6X
a905d913 2 Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
5aa04b01 3 Contributed by Andrew Jenner <andrew@codesourcery.com>
4 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tree.h"
28#include "insn-flags.h"
29#include "output.h"
30#include "insn-attr.h"
31#include "insn-codes.h"
32#include "expr.h"
33#include "regs.h"
34#include "optabs.h"
35#include "recog.h"
36#include "ggc.h"
37#include "sched-int.h"
38#include "timevar.h"
39#include "tm_p.h"
40#include "tm-preds.h"
41#include "tm-constrs.h"
42#include "df.h"
ea1760a3 43#include "function.h"
5aa04b01 44#include "diagnostic-core.h"
45#include "cgraph.h"
5aa04b01 46#include "langhooks.h"
47#include "target.h"
48#include "target-def.h"
49#include "sel-sched.h"
50#include "debug.h"
51#include "opts.h"
758df283 52#include "hw-doloop.h"
9a4c7d5b 53#include "regrename.h"
b9ed1410 54#include "dumpfile.h"
5aa04b01 55
56/* Table of supported architecture variants. */
57typedef struct
58{
59 const char *arch;
60 enum c6x_cpu_type type;
61 unsigned short features;
62} c6x_arch_table;
63
64/* A list of all ISAs, mapping each one to a representative device.
65 Used for -march selection. */
66static const c6x_arch_table all_isas[] =
67{
68#define C6X_ISA(NAME,DEVICE,FLAGS) \
69 { NAME, DEVICE, FLAGS },
70#include "c6x-isas.def"
71#undef C6X_ISA
72 { NULL, C6X_CPU_C62X, 0 }
73};
74
75/* This is the parsed result of the "-march=" option, if given. */
76enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
77
78/* A mask of insn types that are allowed by the architecture selected by
79 the -march option. */
80unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
81
82/* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
83 */
84static rtx c6x_current_insn = NULL_RTX;
85
86/* A decl we build to access __c6xabi_DSBT_base. */
87static GTY(()) tree dsbt_decl;
88\f
89/* Determines whether we run our final scheduling pass or not. We always
90 avoid the normal second scheduling pass. */
91static int c6x_flag_schedule_insns2;
92
93/* Determines whether we run variable tracking in machine dependent
94 reorganization. */
95static int c6x_flag_var_tracking;
96
97/* Determines whether we use modulo scheduling. */
98static int c6x_flag_modulo_sched;
99
100/* Record the state of flag_pic before we set it to 1 for DSBT. */
101int c6x_initial_flag_pic;
102\f
103typedef struct
104{
105 /* We record the clock cycle for every insn during scheduling. */
106 int clock;
107 /* After scheduling, we run assign_reservations to choose unit
108 reservations for all insns. These are recorded here. */
109 int reservation;
110 /* Records the new condition for insns which must be made
111 conditional after scheduling. An entry of NULL_RTX means no such
112 change is necessary. */
113 rtx new_cond;
114 /* True for the first insn that was scheduled in an ebb. */
115 bool ebb_start;
87ded687 116 /* The scheduler state after the insn, transformed into a mask of UNIT_QID
117 bits rather than storing the state. Meaningful only for the last
118 insn in a cycle. */
119 unsigned int unit_mask;
5aa04b01 120} c6x_sched_insn_info;
121
5aa04b01 122
123/* Record a c6x_sched_insn_info structure for every insn in the function. */
f1f41a6c 124static vec<c6x_sched_insn_info> insn_info;
5aa04b01 125
f1f41a6c 126#define INSN_INFO_LENGTH (insn_info).length ()
127#define INSN_INFO_ENTRY(N) (insn_info[(N)])
5aa04b01 128
129static bool done_cfi_sections;
130
5aa04b01 131#define RESERVATION_FLAG_D 1
132#define RESERVATION_FLAG_L 2
133#define RESERVATION_FLAG_S 4
134#define RESERVATION_FLAG_M 8
135#define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
136#define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
137#define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
138#define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
139
87ded687 140/* The DFA names of the units. */
141static const char *const c6x_unit_names[] =
142{
143 "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
144 "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
145};
146
147/* The DFA unit number for each unit in c6x_unit_names[]. */
148static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
149
150/* Unit query IDs. */
151#define UNIT_QID_D1 0
152#define UNIT_QID_L1 1
153#define UNIT_QID_S1 2
154#define UNIT_QID_M1 3
155#define UNIT_QID_FPS1 4
156#define UNIT_QID_FPL1 5
157#define UNIT_QID_ADDDPS1 6
158#define UNIT_QID_ADDDPL1 7
159#define UNIT_QID_SIDE_OFFSET 8
160
5aa04b01 161#define RESERVATION_S1 2
87ded687 162#define RESERVATION_S2 10
758df283 163
164/* An enum for the unit requirements we count in the UNIT_REQS table. */
165enum unitreqs
166{
167 UNIT_REQ_D,
168 UNIT_REQ_L,
169 UNIT_REQ_S,
170 UNIT_REQ_M,
171 UNIT_REQ_DL,
172 UNIT_REQ_DS,
173 UNIT_REQ_LS,
174 UNIT_REQ_DLS,
175 UNIT_REQ_T,
176 UNIT_REQ_X,
177 UNIT_REQ_MAX
178};
179
180/* A table used to count unit requirements. Used when computing minimum
181 iteration intervals. */
182typedef int unit_req_table[2][UNIT_REQ_MAX];
183static unit_req_table unit_reqs;
5aa04b01 184\f
185/* Register map for debugging. */
186int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
187{
188 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */
189 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */
190 50, 51, 52,
191 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */
192 29, 30, 31,
193 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */
194 66, 67, 68,
195 -1, -1, -1 /* FP, ARGP, ILC. */
196};
197\f
198/* Allocate a new, cleared machine_function structure. */
199
200static struct machine_function *
201c6x_init_machine_status (void)
202{
203 return ggc_alloc_cleared_machine_function ();
204}
205
206/* Implement TARGET_OPTION_OVERRIDE. */
207
208static void
209c6x_option_override (void)
210{
87ded687 211 unsigned i;
212
5aa04b01 213 if (global_options_set.x_c6x_arch_option)
214 {
215 c6x_arch = all_isas[c6x_arch_option].type;
216 c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
217 c6x_insn_mask |= all_isas[c6x_arch_option].features;
218 }
219
220 c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
221 flag_schedule_insns_after_reload = 0;
222
223 c6x_flag_modulo_sched = flag_modulo_sched;
224 flag_modulo_sched = 0;
225
226 init_machine_status = c6x_init_machine_status;
227
87ded687 228 for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
229 c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
230
5aa04b01 231 if (flag_pic && !TARGET_DSBT)
232 {
233 error ("-fpic and -fPIC not supported without -mdsbt on this target");
234 flag_pic = 0;
235 }
236 c6x_initial_flag_pic = flag_pic;
237 if (TARGET_DSBT && !flag_pic)
238 flag_pic = 1;
239}
240
241
242/* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */
243
244static void
245c6x_conditional_register_usage (void)
246{
247 int i;
248 if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
249 for (i = 16; i < 32; i++)
250 {
251 fixed_regs[i] = 1;
252 fixed_regs[32 + i] = 1;
253 }
254 if (TARGET_INSNS_64)
255 {
256 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
257 REG_A0);
258 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
259 REG_A0);
260 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
261 REG_A0);
262 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
263 REG_A0);
264 }
265}
266\f
267static GTY(()) rtx eqdf_libfunc;
268static GTY(()) rtx nedf_libfunc;
269static GTY(()) rtx ledf_libfunc;
270static GTY(()) rtx ltdf_libfunc;
271static GTY(()) rtx gedf_libfunc;
272static GTY(()) rtx gtdf_libfunc;
273static GTY(()) rtx eqsf_libfunc;
274static GTY(()) rtx nesf_libfunc;
275static GTY(()) rtx lesf_libfunc;
276static GTY(()) rtx ltsf_libfunc;
277static GTY(()) rtx gesf_libfunc;
278static GTY(()) rtx gtsf_libfunc;
279static GTY(()) rtx strasgi_libfunc;
280static GTY(()) rtx strasgi64p_libfunc;
281
282/* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
283 functions to match the C6x ABI. */
284
285static void
286c6x_init_libfuncs (void)
287{
288 /* Double-precision floating-point arithmetic. */
289 set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
290 set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
291 set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
292 set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
293 set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
294
295 /* Single-precision floating-point arithmetic. */
296 set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
297 set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
298 set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
299 set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
300 set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
301
302 /* Floating-point comparisons. */
303 eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
304 nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
305 lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
306 ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
307 gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
308 gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
309 eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
310 nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
311 ledf_libfunc = init_one_libfunc ("__c6xabi_led");
312 ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
313 gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
314 gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
315
316 set_optab_libfunc (eq_optab, SFmode, NULL);
317 set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
318 set_optab_libfunc (gt_optab, SFmode, NULL);
319 set_optab_libfunc (ge_optab, SFmode, NULL);
320 set_optab_libfunc (lt_optab, SFmode, NULL);
321 set_optab_libfunc (le_optab, SFmode, NULL);
322 set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
323 set_optab_libfunc (eq_optab, DFmode, NULL);
324 set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
325 set_optab_libfunc (gt_optab, DFmode, NULL);
326 set_optab_libfunc (ge_optab, DFmode, NULL);
327 set_optab_libfunc (lt_optab, DFmode, NULL);
328 set_optab_libfunc (le_optab, DFmode, NULL);
329 set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
330
331 /* Floating-point to integer conversions. */
332 set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
333 set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
334 set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
335 set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
336 set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
337 set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
338 set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
339 set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
340
341 /* Conversions between floating types. */
342 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
343 set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
344
345 /* Integer to floating-point conversions. */
346 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
347 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
348 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
349 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
350 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
351 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
352 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
353 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
354
355 /* Long long. */
356 set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
357 set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
358 set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
359 set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
360
361 set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
362 set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
363 set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
364 set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
365 set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
366 set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
367 set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
368 set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
369 set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
370 set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
371 set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
372
373 /* Block move. */
374 strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
375 strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
376}
377
378/* Begin the assembly file. */
379
380static void
381c6x_file_start (void)
382{
383 /* Variable tracking should be run after all optimizations which change order
384 of insns. It also needs a valid CFG. This can't be done in
385 c6x_override_options, because flag_var_tracking is finalized after
386 that. */
387 c6x_flag_var_tracking = flag_var_tracking;
388 flag_var_tracking = 0;
389
390 done_cfi_sections = false;
391 default_file_start ();
392
393 /* Arrays are aligned to 8-byte boundaries. */
394 asm_fprintf (asm_out_file,
395 "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
396 asm_fprintf (asm_out_file,
397 "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
398
399 /* Stack alignment is 8 bytes. */
400 asm_fprintf (asm_out_file,
401 "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
402 asm_fprintf (asm_out_file,
403 "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
404
405#if 0 /* FIXME: Reenable when TI's tools are fixed. */
406 /* ??? Ideally we'd check flag_short_wchar somehow. */
407 asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
408#endif
409
410 /* We conform to version 1.0 of the ABI. */
411 asm_fprintf (asm_out_file,
412 "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
413
414}
415
416/* The LTO frontend only enables exceptions when it sees a function that
417 uses it. This changes the return value of dwarf2out_do_frame, so we
418 have to check before every function. */
419
420void
421c6x_output_file_unwind (FILE * f)
422{
423 if (done_cfi_sections)
424 return;
425
7e5fc0c4 426 /* Output a .cfi_sections directive. */
427 if (dwarf2out_do_frame ())
5aa04b01 428 {
7e5fc0c4 429 if (flag_unwind_tables || flag_exceptions)
430 {
431 if (write_symbols == DWARF2_DEBUG
432 || write_symbols == VMS_AND_DWARF2_DEBUG)
433 asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
434 else
435 asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
436 }
437 else
438 asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
5aa04b01 439 done_cfi_sections = true;
440 }
441}
442
443/* Output unwind directives at the end of a function. */
444
445static void
446c6x_output_fn_unwind (FILE * f)
447{
448 /* Return immediately if we are not generating unwinding tables. */
449 if (! (flag_unwind_tables || flag_exceptions))
450 return;
451
452 /* If this function will never be unwound, then mark it as such. */
453 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
454 && (TREE_NOTHROW (current_function_decl)
455 || crtl->all_throwers_are_sibcalls))
456 fputs("\t.cantunwind\n", f);
457
458 fputs ("\t.endp\n", f);
459}
460
461\f
462/* Stack and Calling. */
463
464int argument_registers[10] =
465{
466 REG_A4, REG_B4,
467 REG_A6, REG_B6,
468 REG_A8, REG_B8,
469 REG_A10, REG_B10,
470 REG_A12, REG_B12
471};
472
473/* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */
474
475void
476c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
477 int n_named_args ATTRIBUTE_UNUSED)
478{
479 cum->count = 0;
480 cum->nregs = 10;
481 if (!libname && fntype)
482 {
483 /* We need to find out the number of named arguments. Unfortunately,
484 for incoming arguments, N_NAMED_ARGS is set to -1. */
485 if (stdarg_p (fntype))
486 cum->nregs = type_num_arguments (fntype) - 1;
487 if (cum->nregs > 10)
488 cum->nregs = 10;
489 }
490}
491
492/* Implements the macro FUNCTION_ARG defined in c6x.h. */
493
494static rtx
495c6x_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
496 const_tree type, bool named ATTRIBUTE_UNUSED)
497{
498 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
499 if (cum->count >= cum->nregs)
500 return NULL_RTX;
501 if (type)
502 {
503 HOST_WIDE_INT size = int_size_in_bytes (type);
504 if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
505 {
506 if (size > 4)
507 {
508 rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
509 rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
510 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
511 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
512 return gen_rtx_PARALLEL (mode, vec);
513 }
514 }
515 }
516 return gen_rtx_REG (mode, argument_registers[cum->count]);
517}
518
519static void
520c6x_function_arg_advance (cumulative_args_t cum_v,
521 enum machine_mode mode ATTRIBUTE_UNUSED,
522 const_tree type ATTRIBUTE_UNUSED,
523 bool named ATTRIBUTE_UNUSED)
524{
525 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
526 cum->count++;
527}
528
529
530/* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
531 upward rather than downward. */
532
533bool
534c6x_block_reg_pad_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
535 const_tree type, bool first)
536{
537 HOST_WIDE_INT size;
538
539 if (!TARGET_BIG_ENDIAN)
540 return true;
541 if (!first)
542 return true;
543 if (!type)
544 return true;
545 size = int_size_in_bytes (type);
546 return size == 3;
547}
548
549/* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
550
551static unsigned int
552c6x_function_arg_boundary (enum machine_mode mode, const_tree type)
553{
554 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
555
556 if (boundary > BITS_PER_WORD)
557 return 2 * BITS_PER_WORD;
558
559 if (mode == BLKmode)
560 {
561 HOST_WIDE_INT size = int_size_in_bytes (type);
562 if (size > 4)
563 return 2 * BITS_PER_WORD;
564 if (boundary < BITS_PER_WORD)
565 {
566 if (size >= 3)
567 return BITS_PER_WORD;
568 if (size >= 2)
569 return 2 * BITS_PER_UNIT;
570 }
571 }
572 return boundary;
573}
574
575/* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */
576static unsigned int
577c6x_function_arg_round_boundary (enum machine_mode mode, const_tree type)
578{
579 return c6x_function_arg_boundary (mode, type);
580}
581
582/* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
583 where function FUNC returns or receives a value of data type TYPE. */
584
585static rtx
586c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
587 bool outgoing ATTRIBUTE_UNUSED)
588{
589 /* Functions return values in register A4. When returning aggregates, we may
590 have to adjust for endianness. */
591 if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
592 {
593 HOST_WIDE_INT size = int_size_in_bytes (type);
594 if (size > 4)
595 {
596
597 rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
598 rtx reg2 = gen_rtx_REG (SImode, REG_A4);
599 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
600 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
601 return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
602 }
603 }
604 return gen_rtx_REG (TYPE_MODE (type), REG_A4);
605}
606
607/* Implement TARGET_LIBCALL_VALUE. */
608
609static rtx
610c6x_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
611{
612 return gen_rtx_REG (mode, REG_A4);
613}
614
615/* TARGET_STRUCT_VALUE_RTX implementation. */
616
617static rtx
618c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
619{
620 return gen_rtx_REG (Pmode, REG_A3);
621}
622
623/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
624
625static bool
626c6x_function_value_regno_p (const unsigned int regno)
627{
628 return regno == REG_A4;
629}
630
631/* Types larger than 64 bit, and variable sized types, are passed by
632 reference. The callee must copy them; see c6x_callee_copies. */
633
634static bool
635c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
636 enum machine_mode mode, const_tree type,
637 bool named ATTRIBUTE_UNUSED)
638{
639 int size = -1;
640 if (type)
641 size = int_size_in_bytes (type);
642 else if (mode != VOIDmode)
643 size = GET_MODE_SIZE (mode);
644 return size > 2 * UNITS_PER_WORD || size == -1;
645}
646
647/* Decide whether a type should be returned in memory (true)
648 or in a register (false). This is called by the macro
649 TARGET_RETURN_IN_MEMORY. */
650
651static bool
652c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
653{
654 int size = int_size_in_bytes (type);
655 return size > 2 * UNITS_PER_WORD || size == -1;
656}
657
658/* Values which must be returned in the most-significant end of the return
659 register. */
660
661static bool
662c6x_return_in_msb (const_tree valtype)
663{
664 HOST_WIDE_INT size = int_size_in_bytes (valtype);
665 return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
666}
667
668/* Implement TARGET_CALLEE_COPIES. */
669
670static bool
671c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
672 enum machine_mode mode ATTRIBUTE_UNUSED,
673 const_tree type ATTRIBUTE_UNUSED,
674 bool named ATTRIBUTE_UNUSED)
675{
676 return true;
677}
678
679/* Return the type to use as __builtin_va_list. */
680static tree
681c6x_build_builtin_va_list (void)
682{
683 return build_pointer_type (char_type_node);
684}
685\f
686static void
687c6x_asm_trampoline_template (FILE *f)
688{
689 fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
690 fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
691 fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
692 fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
693 fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
694 fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
695 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
696 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
697}
698
699/* Emit RTL insns to initialize the variable parts of a trampoline at
700 TRAMP. FNADDR is an RTX for the address of the function's pure
701 code. CXT is an RTX for the static chain value for the function. */
702
703static void
704c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
705{
706 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
707 rtx t1 = copy_to_reg (fnaddr);
708 rtx t2 = copy_to_reg (cxt);
709 rtx mask = gen_reg_rtx (SImode);
710 int i;
711
712 emit_block_move (tramp, assemble_trampoline_template (),
713 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
714
715 emit_move_insn (mask, GEN_INT (0xffff << 7));
716
717 for (i = 0; i < 4; i++)
718 {
719 rtx mem = adjust_address (tramp, SImode, i * 4);
720 rtx t = (i & 1) ? t2 : t1;
721 rtx v1 = gen_reg_rtx (SImode);
722 rtx v2 = gen_reg_rtx (SImode);
723 emit_move_insn (v1, mem);
724 if (i < 2)
725 emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
726 else
727 emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
728 emit_insn (gen_andsi3 (v2, v2, mask));
729 emit_insn (gen_iorsi3 (v2, v2, v1));
730 emit_move_insn (mem, v2);
731 }
732#ifdef CLEAR_INSN_CACHE
733 tramp = XEXP (tramp, 0);
734 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
735 LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
29c05e22 736 plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
737 Pmode);
5aa04b01 738#endif
739}
740\f
741/* Determine whether c6x_output_mi_thunk can succeed. */
742
743static bool
744c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
745 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
746 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
747 const_tree function ATTRIBUTE_UNUSED)
748{
749 return !TARGET_LONG_CALLS;
750}
751
752/* Output the assembler code for a thunk function. THUNK is the
753 declaration for the thunk function itself, FUNCTION is the decl for
754 the target function. DELTA is an immediate constant offset to be
755 added to THIS. If VCALL_OFFSET is nonzero, the word at
756 *(*this + vcall_offset) should be added to THIS. */
757
758static void
759c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
760 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
761 HOST_WIDE_INT vcall_offset, tree function)
762{
763 rtx xops[5];
764 /* The this parameter is passed as the first argument. */
765 rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
766
767 c6x_current_insn = NULL_RTX;
768
769 xops[4] = XEXP (DECL_RTL (function), 0);
770 if (!vcall_offset)
771 {
772 output_asm_insn ("b .s2 \t%4", xops);
773 if (!delta)
774 output_asm_insn ("nop 5", xops);
775 }
776
777 /* Adjust the this parameter by a fixed constant. */
778 if (delta)
779 {
780 xops[0] = GEN_INT (delta);
781 xops[1] = this_rtx;
782 if (delta >= -16 && delta <= 15)
783 {
784 output_asm_insn ("add .s1 %0, %1, %1", xops);
785 if (!vcall_offset)
786 output_asm_insn ("nop 4", xops);
787 }
788 else if (delta >= 16 && delta < 32)
789 {
790 output_asm_insn ("add .d1 %0, %1, %1", xops);
791 if (!vcall_offset)
792 output_asm_insn ("nop 4", xops);
793 }
794 else if (delta >= -32768 && delta < 32768)
795 {
796 output_asm_insn ("mvk .s1 %0, A0", xops);
797 output_asm_insn ("add .d1 %1, A0, %1", xops);
798 if (!vcall_offset)
799 output_asm_insn ("nop 3", xops);
800 }
801 else
802 {
803 output_asm_insn ("mvkl .s1 %0, A0", xops);
804 output_asm_insn ("mvkh .s1 %0, A0", xops);
805 output_asm_insn ("add .d1 %1, A0, %1", xops);
806 if (!vcall_offset)
807 output_asm_insn ("nop 3", xops);
808 }
809 }
810
811 /* Adjust the this parameter by a value stored in the vtable. */
812 if (vcall_offset)
813 {
814 rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
815 rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
816
817 xops[1] = a3tmp;
818 xops[2] = a0tmp;
819 xops[3] = gen_rtx_MEM (Pmode, a0tmp);
820 output_asm_insn ("mv .s1 a4, %2", xops);
821 output_asm_insn ("ldw .d1t1 %3, %2", xops);
822
823 /* Adjust the this parameter. */
29c05e22 824 xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
825 vcall_offset));
5aa04b01 826 if (!memory_operand (xops[0], Pmode))
827 {
828 rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
829 xops[0] = GEN_INT (vcall_offset);
830 xops[1] = tmp2;
831 output_asm_insn ("mvkl .s1 %0, %1", xops);
832 output_asm_insn ("mvkh .s1 %0, %1", xops);
833 output_asm_insn ("nop 2", xops);
834 output_asm_insn ("add .d1 %2, %1, %2", xops);
835 xops[0] = gen_rtx_MEM (Pmode, a0tmp);
836 }
837 else
838 output_asm_insn ("nop 4", xops);
839 xops[2] = this_rtx;
840 output_asm_insn ("ldw .d1t1 %0, %1", xops);
841 output_asm_insn ("|| b .s2 \t%4", xops);
842 output_asm_insn ("nop 4", xops);
843 output_asm_insn ("add .d1 %2, %1, %2", xops);
844 }
845}
846\f
847/* Return true if EXP goes in small data/bss. */
848
849static bool
850c6x_in_small_data_p (const_tree exp)
851{
852 /* We want to merge strings, so we never consider them small data. */
853 if (TREE_CODE (exp) == STRING_CST)
854 return false;
855
856 /* Functions are never small data. */
857 if (TREE_CODE (exp) == FUNCTION_DECL)
858 return false;
859
860 if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
861 return false;
862
863 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
864 {
865 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
866
867 if (strcmp (section, ".neardata") == 0
868 || strncmp (section, ".neardata.", 10) == 0
869 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
870 || strcmp (section, ".bss") == 0
871 || strncmp (section, ".bss.", 5) == 0
872 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
873 || strcmp (section, ".rodata") == 0
874 || strncmp (section, ".rodata.", 8) == 0
875 || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
876 return true;
877 }
878 else
879 return PLACE_IN_SDATA_P (exp);
880
881 return false;
882}
883
884/* Return a section for X. The only special thing we do here is to
885 honor small data. We don't have a tree type, so we can't use the
886 PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
887 everything sized 8 bytes or smaller into small data. */
888
889static section *
890c6x_select_rtx_section (enum machine_mode mode, rtx x,
891 unsigned HOST_WIDE_INT align)
892{
893 if (c6x_sdata_mode == C6X_SDATA_ALL
894 || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
895 /* ??? Consider using mergeable sdata sections. */
896 return sdata_section;
897 else
898 return default_elf_select_rtx_section (mode, x, align);
899}
900
901static section *
902c6x_elf_select_section (tree decl, int reloc,
903 unsigned HOST_WIDE_INT align)
904{
905 const char *sname = NULL;
906 unsigned int flags = SECTION_WRITE;
907 if (c6x_in_small_data_p (decl))
908 {
909 switch (categorize_decl_for_section (decl, reloc))
910 {
911 case SECCAT_SRODATA:
912 sname = ".rodata";
913 flags = 0;
914 break;
915 case SECCAT_SDATA:
916 sname = ".neardata";
917 break;
918 case SECCAT_SBSS:
919 sname = ".bss";
920 flags |= SECTION_BSS;
921 default:
922 break;
923 }
924 }
925 else
926 {
927 switch (categorize_decl_for_section (decl, reloc))
928 {
929 case SECCAT_DATA:
930 sname = ".fardata";
931 break;
932 case SECCAT_DATA_REL:
933 sname = ".fardata.rel";
934 break;
935 case SECCAT_DATA_REL_LOCAL:
936 sname = ".fardata.rel.local";
937 break;
938 case SECCAT_DATA_REL_RO:
939 sname = ".fardata.rel.ro";
940 break;
941 case SECCAT_DATA_REL_RO_LOCAL:
942 sname = ".fardata.rel.ro.local";
943 break;
944 case SECCAT_BSS:
945 sname = ".far";
946 flags |= SECTION_BSS;
947 break;
948 case SECCAT_RODATA:
949 sname = ".const";
950 flags = 0;
951 break;
952 case SECCAT_SRODATA:
953 case SECCAT_SDATA:
954 case SECCAT_SBSS:
955 gcc_unreachable ();
956 default:
957 break;
958 }
959 }
960 if (sname)
961 {
962 /* We might get called with string constants, but get_named_section
963 doesn't like them as they are not DECLs. Also, we need to set
964 flags in that case. */
965 if (!DECL_P (decl))
966 return get_section (sname, flags, NULL);
967 return get_named_section (decl, sname, reloc);
968 }
969
970 return default_elf_select_section (decl, reloc, align);
971}
972
973/* Build up a unique section name, expressed as a
974 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
975 RELOC indicates whether the initial value of EXP requires
976 link-time relocations. */
977
978static void ATTRIBUTE_UNUSED
979c6x_elf_unique_section (tree decl, int reloc)
980{
981 const char *prefix = NULL;
982 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
983 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
984
985 if (c6x_in_small_data_p (decl))
986 {
987 switch (categorize_decl_for_section (decl, reloc))
988 {
989 case SECCAT_SDATA:
990 prefix = one_only ? ".s" : ".neardata";
991 break;
992 case SECCAT_SBSS:
993 prefix = one_only ? ".sb" : ".bss";
994 break;
995 case SECCAT_SRODATA:
996 prefix = one_only ? ".s2" : ".rodata";
997 break;
998 case SECCAT_RODATA_MERGE_STR:
999 case SECCAT_RODATA_MERGE_STR_INIT:
1000 case SECCAT_RODATA_MERGE_CONST:
1001 case SECCAT_RODATA:
1002 case SECCAT_DATA:
1003 case SECCAT_DATA_REL:
1004 case SECCAT_DATA_REL_LOCAL:
1005 case SECCAT_DATA_REL_RO:
1006 case SECCAT_DATA_REL_RO_LOCAL:
1007 gcc_unreachable ();
1008 default:
1009 /* Everything else we place into default sections and hope for the
1010 best. */
1011 break;
1012 }
1013 }
1014 else
1015 {
1016 switch (categorize_decl_for_section (decl, reloc))
1017 {
1018 case SECCAT_DATA:
1019 case SECCAT_DATA_REL:
1020 case SECCAT_DATA_REL_LOCAL:
1021 case SECCAT_DATA_REL_RO:
1022 case SECCAT_DATA_REL_RO_LOCAL:
1023 prefix = one_only ? ".fd" : ".fardata";
1024 break;
1025 case SECCAT_BSS:
1026 prefix = one_only ? ".fb" : ".far";
1027 break;
1028 case SECCAT_RODATA:
1029 case SECCAT_RODATA_MERGE_STR:
1030 case SECCAT_RODATA_MERGE_STR_INIT:
1031 case SECCAT_RODATA_MERGE_CONST:
1032 prefix = one_only ? ".fr" : ".const";
1033 break;
1034 case SECCAT_SRODATA:
1035 case SECCAT_SDATA:
1036 case SECCAT_SBSS:
1037 gcc_unreachable ();
1038 default:
1039 break;
1040 }
1041 }
1042
1043 if (prefix)
1044 {
1045 const char *name, *linkonce;
1046 char *string;
1047
1048 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1049 name = targetm.strip_name_encoding (name);
1050
1051 /* If we're using one_only, then there needs to be a .gnu.linkonce
1052 prefix to the section name. */
1053 linkonce = one_only ? ".gnu.linkonce" : "";
1054
1055 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1056
1057 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
1058 return;
1059 }
1060 default_unique_section (decl, reloc);
1061}
1062
1063static unsigned int
1064c6x_section_type_flags (tree decl, const char *name, int reloc)
1065{
1066 unsigned int flags = 0;
1067
1068 if (strcmp (name, ".far") == 0
1069 || strncmp (name, ".far.", 5) == 0)
1070 flags |= SECTION_BSS;
1071
1072 flags |= default_section_type_flags (decl, name, reloc);
1073
1074 return flags;
1075}
1076\f
1077/* Checks whether the given CALL_EXPR would use a caller saved
1078 register. This is used to decide whether sibling call optimization
1079 could be performed on the respective function call. */
1080
1081static bool
1082c6x_call_saved_register_used (tree call_expr)
1083{
1084 CUMULATIVE_ARGS cum_v;
1085 cumulative_args_t cum;
1086 HARD_REG_SET call_saved_regset;
1087 tree parameter;
1088 enum machine_mode mode;
1089 tree type;
1090 rtx parm_rtx;
1091 int i;
1092
1093 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1094 cum = pack_cumulative_args (&cum_v);
1095
1096 COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1097 for (i = 0; i < call_expr_nargs (call_expr); i++)
1098 {
1099 parameter = CALL_EXPR_ARG (call_expr, i);
1100 gcc_assert (parameter);
1101
1102 /* For an undeclared variable passed as parameter we will get
1103 an ERROR_MARK node here. */
1104 if (TREE_CODE (parameter) == ERROR_MARK)
1105 return true;
1106
1107 type = TREE_TYPE (parameter);
1108 gcc_assert (type);
1109
1110 mode = TYPE_MODE (type);
1111 gcc_assert (mode);
1112
1113 if (pass_by_reference (&cum_v, mode, type, true))
1114 {
1115 mode = Pmode;
1116 type = build_pointer_type (type);
1117 }
1118
1119 parm_rtx = c6x_function_arg (cum, mode, type, 0);
1120
1121 c6x_function_arg_advance (cum, mode, type, 0);
1122
1123 if (!parm_rtx)
1124 continue;
1125
1126 if (REG_P (parm_rtx)
1127 && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1128 REGNO (parm_rtx)))
1129 return true;
1130 if (GET_CODE (parm_rtx) == PARALLEL)
1131 {
1132 int n = XVECLEN (parm_rtx, 0);
1133 while (n-- > 0)
1134 {
1135 rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1136 if (REG_P (x)
1137 && overlaps_hard_reg_set_p (call_saved_regset,
1138 GET_MODE (x), REGNO (x)))
1139 return true;
1140 }
1141 }
1142 }
1143 return false;
1144}
1145
1146/* Decide whether we can make a sibling call to a function. DECL is the
1147 declaration of the function being targeted by the call and EXP is the
1148 CALL_EXPR representing the call. */
1149
1150static bool
1151c6x_function_ok_for_sibcall (tree decl, tree exp)
1152{
1153 /* Registers A10, A12, B10 and B12 are available as arguments
1154 register but unfortunately caller saved. This makes functions
1155 needing these registers for arguments not suitable for
1156 sibcalls. */
1157 if (c6x_call_saved_register_used (exp))
1158 return false;
1159
1160 if (!flag_pic)
1161 return true;
1162
1163 if (TARGET_DSBT)
1164 {
1165 /* When compiling for DSBT, the calling function must be local,
1166 so that when we reload B14 in the sibcall epilogue, it will
1167 not change its value. */
1168 struct cgraph_local_info *this_func;
1169
1170 if (!decl)
1171 /* Not enough information. */
1172 return false;
1173
1174 this_func = cgraph_local_info (current_function_decl);
1175 return this_func->local;
1176 }
1177
1178 return true;
1179}
1180
1181/* Return true if DECL is known to be linked into section SECTION. */
1182
1183static bool
1184c6x_function_in_section_p (tree decl, section *section)
1185{
1186 /* We can only be certain about functions defined in the same
1187 compilation unit. */
1188 if (!TREE_STATIC (decl))
1189 return false;
1190
1191 /* Make sure that SYMBOL always binds to the definition in this
1192 compilation unit. */
1193 if (!targetm.binds_local_p (decl))
1194 return false;
1195
1196 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
1197 if (!DECL_SECTION_NAME (decl))
1198 {
1199 /* Make sure that we will not create a unique section for DECL. */
1200 if (flag_function_sections || DECL_ONE_ONLY (decl))
1201 return false;
1202 }
1203
1204 return function_section (decl) == section;
1205}
1206
1207/* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1208 as a long call. */
1209bool
1210c6x_long_call_p (rtx op)
1211{
1212 tree decl;
1213
1214 if (!TARGET_LONG_CALLS)
1215 return false;
1216
1217 decl = SYMBOL_REF_DECL (op);
1218
1219 /* Try to determine whether the symbol is in the same section as the current
1220 function. Be conservative, and only cater for cases in which the
1221 whole of the current function is placed in the same section. */
1222 if (decl != NULL_TREE
1223 && !flag_reorder_blocks_and_partition
1224 && TREE_CODE (decl) == FUNCTION_DECL
1225 && c6x_function_in_section_p (decl, current_function_section ()))
1226 return false;
1227
1228 return true;
1229}
1230
1231/* Emit the sequence for a call. */
1232void
1233c6x_expand_call (rtx retval, rtx address, bool sibcall)
1234{
1235 rtx callee = XEXP (address, 0);
1236 rtx call_insn;
1237
1238 if (!c6x_call_operand (callee, Pmode))
1239 {
1240 callee = force_reg (Pmode, callee);
1241 address = change_address (address, Pmode, callee);
1242 }
1243 call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1244 if (sibcall)
1245 {
1246 call_insn = emit_call_insn (call_insn);
1247 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1248 gen_rtx_REG (Pmode, REG_B3));
1249 }
1250 else
1251 {
1252 if (retval == NULL_RTX)
1253 call_insn = emit_call_insn (call_insn);
1254 else
1255 call_insn = emit_call_insn (gen_rtx_SET (GET_MODE (retval), retval,
1256 call_insn));
1257 }
1258 if (flag_pic)
1259 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1260}
1261
1262/* Legitimize PIC addresses. If the address is already position-independent,
1263 we return ORIG. Newly generated position-independent addresses go into a
1264 reg. This is REG if nonzero, otherwise we allocate register(s) as
1265 necessary. PICREG is the register holding the pointer to the PIC offset
1266 table. */
1267
1268static rtx
1269legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1270{
1271 rtx addr = orig;
1272 rtx new_rtx = orig;
1273
1274 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1275 {
1276 int unspec = UNSPEC_LOAD_GOT;
1277 rtx tmp;
1278
1279 if (reg == 0)
1280 {
1281 gcc_assert (can_create_pseudo_p ());
1282 reg = gen_reg_rtx (Pmode);
1283 }
1284 if (flag_pic == 2)
1285 {
1286 if (can_create_pseudo_p ())
1287 tmp = gen_reg_rtx (Pmode);
1288 else
1289 tmp = reg;
1290 emit_insn (gen_movsi_gotoff_high (tmp, addr));
1291 emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1292 emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1293 }
1294 else
1295 {
1296 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1297 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1298
1299 emit_move_insn (reg, new_rtx);
1300 }
1301 if (picreg == pic_offset_table_rtx)
1302 crtl->uses_pic_offset_table = 1;
1303 return reg;
1304 }
1305
1306 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1307 {
1308 rtx base;
1309
1310 if (GET_CODE (addr) == CONST)
1311 {
1312 addr = XEXP (addr, 0);
1313 gcc_assert (GET_CODE (addr) == PLUS);
1314 }
1315
1316 if (XEXP (addr, 0) == picreg)
1317 return orig;
1318
1319 if (reg == 0)
1320 {
1321 gcc_assert (can_create_pseudo_p ());
1322 reg = gen_reg_rtx (Pmode);
1323 }
1324
1325 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1326 addr = legitimize_pic_address (XEXP (addr, 1),
1327 base == reg ? NULL_RTX : reg,
1328 picreg);
1329
1330 if (GET_CODE (addr) == CONST_INT)
1331 {
1332 gcc_assert (! reload_in_progress && ! reload_completed);
1333 addr = force_reg (Pmode, addr);
1334 }
1335
1336 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1337 {
1338 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1339 addr = XEXP (addr, 1);
1340 }
1341
1342 return gen_rtx_PLUS (Pmode, base, addr);
1343 }
1344
1345 return new_rtx;
1346}
1347
1348/* Expand a move operation in mode MODE. The operands are in OPERANDS.
1349 Returns true if no further code must be generated, false if the caller
1350 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1351
1352bool
1353expand_move (rtx *operands, enum machine_mode mode)
1354{
1355 rtx dest = operands[0];
1356 rtx op = operands[1];
1357
1358 if ((reload_in_progress | reload_completed) == 0
1359 && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1360 operands[1] = force_reg (mode, op);
1361 else if (mode == SImode && symbolic_operand (op, SImode))
1362 {
1363 if (flag_pic)
1364 {
1365 if (sdata_symbolic_operand (op, SImode))
1366 {
1367 emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1368 crtl->uses_pic_offset_table = 1;
1369 return true;
1370 }
1371 else
1372 {
1373 rtx temp = (reload_completed || reload_in_progress
1374 ? dest : gen_reg_rtx (Pmode));
1375
1376 operands[1] = legitimize_pic_address (op, temp,
1377 pic_offset_table_rtx);
1378 }
1379 }
1380 else if (reload_completed
1381 && !sdata_symbolic_operand (op, SImode))
1382 {
1383 emit_insn (gen_movsi_high (dest, op));
1384 emit_insn (gen_movsi_lo_sum (dest, dest, op));
1385 return true;
1386 }
1387 }
1388 return false;
1389}
1390
1391/* This function is called when we're about to expand an integer compare
1392 operation which performs COMPARISON. It examines the second operand,
1393 and if it is an integer constant that cannot be used directly on the
1394 current machine in a comparison insn, it returns true. */
1395bool
1396c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1397{
1398 if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1399 return false;
1400
1401 if ((code == EQ || code == LT || code == GT)
1402 && !satisfies_constraint_Is5 (op))
1403 return true;
1404 if ((code == GTU || code == LTU)
1405 && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1406 return true;
1407
1408 return false;
1409}
1410
1411/* Emit comparison instruction if necessary, returning the expression
1412 that holds the compare result in the proper mode. Return the comparison
1413 that should be used in the jump insn. */
1414
1415rtx
1416c6x_expand_compare (rtx comparison, enum machine_mode mode)
1417{
1418 enum rtx_code code = GET_CODE (comparison);
1419 rtx op0 = XEXP (comparison, 0);
1420 rtx op1 = XEXP (comparison, 1);
1421 rtx cmp;
1422 enum rtx_code jump_code = code;
1423 enum machine_mode op_mode = GET_MODE (op0);
1424
1425 if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1426 {
1427 rtx t = gen_reg_rtx (SImode);
1428 emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1429 gen_highpart (SImode, op0)));
1430 op_mode = SImode;
1431 cmp = t;
1432 }
1433 else if (op_mode == DImode)
1434 {
1435 rtx lo[2], high[2];
1436 rtx cmp1, cmp2;
1437
1438 if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1439 {
1440 code = reverse_condition (code);
1441 jump_code = EQ;
1442 }
1443 else
1444 jump_code = NE;
1445
1446 split_di (&op0, 1, lo, high);
1447 split_di (&op1, 1, lo + 1, high + 1);
1448
1449 if (c6x_force_op_for_comparison_p (code, high[1])
1450 || c6x_force_op_for_comparison_p (EQ, high[1]))
1451 high[1] = force_reg (SImode, high[1]);
1452
1453 cmp1 = gen_reg_rtx (SImode);
1454 cmp2 = gen_reg_rtx (SImode);
1455 emit_insn (gen_rtx_SET (VOIDmode, cmp1,
1456 gen_rtx_fmt_ee (code, SImode, high[0], high[1])));
1457 if (code == EQ)
1458 {
1459 if (c6x_force_op_for_comparison_p (code, lo[1]))
1460 lo[1] = force_reg (SImode, lo[1]);
1461 emit_insn (gen_rtx_SET (VOIDmode, cmp2,
1462 gen_rtx_fmt_ee (code, SImode, lo[0], lo[1])));
1463 emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1464 }
1465 else
1466 {
1467 emit_insn (gen_rtx_SET (VOIDmode, cmp2,
1468 gen_rtx_EQ (SImode, high[0], high[1])));
1469 if (code == GT)
1470 code = GTU;
1471 else if (code == LT)
1472 code = LTU;
1473 if (c6x_force_op_for_comparison_p (code, lo[1]))
1474 lo[1] = force_reg (SImode, lo[1]);
1475 emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1476 lo[0], lo[1]),
1477 lo[0], lo[1], cmp2));
1478 emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1479 }
1480 cmp = cmp1;
1481 }
1482 else if (TARGET_FP && !flag_finite_math_only
1483 && (op_mode == DFmode || op_mode == SFmode)
1484 && code != EQ && code != NE && code != LT && code != GT
1485 && code != UNLE && code != UNGE)
1486 {
1487 enum rtx_code code1, code2, code3;
1488 rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1489
1490 jump_code = NE;
1491 code3 = UNKNOWN;
1492 switch (code)
1493 {
1494 case UNLT:
1495 case UNGT:
1496 jump_code = EQ;
1497 /* fall through */
1498 case LE:
1499 case GE:
1500 code1 = code == LE || code == UNGT ? LT : GT;
1501 code2 = EQ;
1502 break;
1503
1504 case UNORDERED:
1505 jump_code = EQ;
1506 /* fall through */
1507 case ORDERED:
1508 code3 = EQ;
1509 /* fall through */
1510 case LTGT:
1511 code1 = LT;
1512 code2 = GT;
1513 break;
1514
1515 case UNEQ:
1516 code1 = LT;
1517 code2 = GT;
1518 jump_code = EQ;
1519 break;
1520
1521 default:
1522 gcc_unreachable ();
1523 }
1524
1525 cmp = gen_reg_rtx (SImode);
1526 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1527 gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1528 fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1529 emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1530 op0, op1, cmp));
1531 if (code3 != UNKNOWN)
1532 emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1533 op0, op1, cmp));
1534 }
1535 else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1536 cmp = op0;
1537 else
1538 {
1539 bool is_fp_libfunc;
1540 is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1541
1542 if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1543 && !is_fp_libfunc)
1544 {
1545 code = reverse_condition (code);
1546 jump_code = EQ;
1547 }
1548 else if (code == UNGE)
1549 {
1550 code = LT;
1551 jump_code = EQ;
1552 }
1553 else if (code == UNLE)
1554 {
1555 code = GT;
1556 jump_code = EQ;
1557 }
1558 else
1559 jump_code = NE;
1560
1561 if (is_fp_libfunc)
1562 {
1563 rtx insns;
1564 rtx libfunc;
1565 switch (code)
1566 {
1567 case EQ:
1568 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1569 break;
1570 case NE:
1571 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1572 break;
1573 case GT:
1574 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1575 break;
1576 case GE:
1577 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1578 break;
1579 case LT:
1580 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1581 break;
1582 case LE:
1583 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1584 break;
1585 default:
1586 gcc_unreachable ();
1587 }
1588 start_sequence ();
1589
1590 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
1591 op0, op_mode, op1, op_mode);
1592 insns = get_insns ();
1593 end_sequence ();
1594
1595 emit_libcall_block (insns, cmp, cmp,
1596 gen_rtx_fmt_ee (code, SImode, op0, op1));
1597 }
1598 else
1599 {
1600 cmp = gen_reg_rtx (SImode);
1601 if (c6x_force_op_for_comparison_p (code, op1))
1602 op1 = force_reg (SImode, op1);
1603 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1604 gen_rtx_fmt_ee (code, SImode, op0, op1)));
1605 }
1606 }
1607
1608 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1609}
1610
1611/* Return one word of double-word value OP. HIGH_P is true to select the
1612 high part, false to select the low part. When encountering auto-increment
1613 addressing, we make the assumption that the low part is going to be accessed
1614 first. */
1615
1616rtx
1617c6x_subword (rtx op, bool high_p)
1618{
1619 unsigned int byte;
1620 enum machine_mode mode;
1621
1622 mode = GET_MODE (op);
1623 if (mode == VOIDmode)
1624 mode = DImode;
1625
1626 if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1627 byte = UNITS_PER_WORD;
1628 else
1629 byte = 0;
1630
1631 if (MEM_P (op))
1632 {
1633 rtx addr = XEXP (op, 0);
1634 if (GET_CODE (addr) == PLUS || REG_P (addr))
1635 return adjust_address (op, word_mode, byte);
1636 /* FIXME: should really support autoincrement addressing for
1637 multi-word modes. */
1638 gcc_unreachable ();
1639 }
1640
1641 return simplify_gen_subreg (word_mode, op, mode, byte);
1642}
1643
1644/* Split one or more DImode RTL references into pairs of SImode
1645 references. The RTL can be REG, offsettable MEM, integer constant, or
1646 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1647 split and "num" is its length. lo_half and hi_half are output arrays
1648 that parallel "operands". */
1649
1650void
1651split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1652{
1653 while (num--)
1654 {
1655 rtx op = operands[num];
1656
1657 lo_half[num] = c6x_subword (op, false);
1658 hi_half[num] = c6x_subword (op, true);
1659 }
1660}
1661
1662/* Return true if VAL is a mask valid for a clr instruction. */
1663bool
1664c6x_valid_mask_p (HOST_WIDE_INT val)
1665{
1666 int i;
1667 for (i = 0; i < 32; i++)
1668 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1669 break;
1670 for (; i < 32; i++)
1671 if (val & ((unsigned HOST_WIDE_INT)1 << i))
1672 break;
1673 for (; i < 32; i++)
1674 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1675 return false;
1676 return true;
1677}
1678
1679/* Expand a block move for a movmemM pattern. */
1680
1681bool
1682c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1683 rtx expected_align_exp ATTRIBUTE_UNUSED,
1684 rtx expected_size_exp ATTRIBUTE_UNUSED)
1685{
1686 unsigned HOST_WIDE_INT align = 1;
1687 unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1688 unsigned HOST_WIDE_INT count = 0, offset = 0;
1689 unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1690
1691 if (CONST_INT_P (align_exp))
1692 align = INTVAL (align_exp);
1693
1694 src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1695 dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1696 min_mem_align = MIN (src_mem_align, dst_mem_align);
1697
1698 if (min_mem_align > align)
1699 align = min_mem_align / BITS_PER_UNIT;
1700 if (src_mem_align < align)
1701 src_mem_align = align;
1702 if (dst_mem_align < align)
1703 dst_mem_align = align;
1704
1705 if (CONST_INT_P (count_exp))
1706 count = INTVAL (count_exp);
1707 else
1708 return false;
1709
1710 /* Make sure we don't need to care about overflow later on. */
1711 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1712 return false;
1713
1714 if (count >= 28 && (count & 3) == 0 && align >= 4)
1715 {
1716 tree dst_expr = MEM_EXPR (dst);
1717 tree src_expr = MEM_EXPR (src);
1718 rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1719 rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1720 rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1721
1722 if (src_expr)
1723 mark_addressable (src_expr);
1724 if (dst_expr)
1725 mark_addressable (dst_expr);
1726 emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
1727 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1728 return true;
1729 }
1730
1731 if (biggest_move > align && !TARGET_INSNS_64)
1732 biggest_move = align;
1733
1734 if (count / biggest_move > 7)
1735 return false;
1736
1737 while (count > 0)
1738 {
1739 rtx reg, reg_lowpart;
1740 enum machine_mode srcmode, dstmode;
1741 unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1742 int shift;
1743 rtx srcmem, dstmem;
1744
1745 while (biggest_move > count)
1746 biggest_move /= 2;
1747
1748 src_size = dst_size = biggest_move;
1749 if (src_size > src_mem_align && src_size == 2)
1750 src_size = 1;
1751 if (dst_size > dst_mem_align && dst_size == 2)
1752 dst_size = 1;
1753
1754 if (dst_size > src_size)
1755 dst_size = src_size;
1756
1757 srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
1758 dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
1759 if (src_size >= 4)
1760 reg_lowpart = reg = gen_reg_rtx (srcmode);
1761 else
1762 {
1763 reg = gen_reg_rtx (SImode);
1764 reg_lowpart = gen_lowpart (srcmode, reg);
1765 }
1766
1767 srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1768
1769 if (src_size > src_mem_align)
1770 {
1771 enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1772 : CODE_FOR_movmisaligndi);
1773 emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1774 }
1775 else
1776 emit_move_insn (reg_lowpart, srcmem);
1777
1778 src_left = src_size;
1779 shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0;
1780 while (src_left > 0)
1781 {
1782 rtx dstreg = reg_lowpart;
1783
1784 if (src_size > dst_size)
1785 {
1786 rtx srcword = reg;
1787 int shift_amount = shift & (BITS_PER_WORD - 1);
1788 if (src_size > 4)
1789 srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1790 SImode);
1791 if (shift_amount > 0)
1792 {
1793 dstreg = gen_reg_rtx (SImode);
1794 emit_insn (gen_lshrsi3 (dstreg, srcword,
1795 GEN_INT (shift_amount)));
1796 }
1797 else
1798 dstreg = srcword;
1799 dstreg = gen_lowpart (dstmode, dstreg);
1800 }
1801
1802 dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1803 if (dst_size > dst_mem_align)
1804 {
1805 enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1806 : CODE_FOR_movmisaligndi);
1807 emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1808 }
1809 else
1810 emit_move_insn (dstmem, dstreg);
1811
1812 if (TARGET_BIG_ENDIAN)
1813 shift -= dst_size * BITS_PER_UNIT;
1814 else
1815 shift += dst_size * BITS_PER_UNIT;
1816 offset += dst_size;
1817 src_left -= dst_size;
1818 }
1819 count -= src_size;
1820 }
1821 return true;
1822}
1823\f
1824/* Subroutine of print_address_operand, print a single address offset OFF for
1825 a memory access of mode MEM_MODE, choosing between normal form and scaled
1826 form depending on the type of the insn. Misaligned memory references must
1827 use the scaled form. */
1828
1829static void
1830print_address_offset (FILE *file, rtx off, enum machine_mode mem_mode)
1831{
1832 rtx pat;
1833
1834 if (c6x_current_insn != NULL_RTX)
1835 {
1836 pat = PATTERN (c6x_current_insn);
1837 if (GET_CODE (pat) == COND_EXEC)
1838 pat = COND_EXEC_CODE (pat);
1839 if (GET_CODE (pat) == PARALLEL)
1840 pat = XVECEXP (pat, 0, 0);
1841
1842 if (GET_CODE (pat) == SET
1843 && GET_CODE (SET_SRC (pat)) == UNSPEC
1844 && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1845 {
1846 gcc_assert (CONST_INT_P (off)
1847 && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1848 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1849 INTVAL (off) / GET_MODE_SIZE (mem_mode));
1850 return;
1851 }
1852 }
1853 fputs ("(", file);
1854 output_address (off);
1855 fputs (")", file);
1856}
1857
1858static bool
1859c6x_print_operand_punct_valid_p (unsigned char c)
1860{
1861 return c == '$' || c == '.' || c == '|';
1862}
1863
1864static void c6x_print_operand (FILE *, rtx, int);
1865
1866/* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */
1867
1868static void
1869c6x_print_address_operand (FILE *file, rtx x, enum machine_mode mem_mode)
1870{
1871 rtx off;
1872 switch (GET_CODE (x))
1873 {
1874 case PRE_MODIFY:
1875 case POST_MODIFY:
1876 if (GET_CODE (x) == POST_MODIFY)
1877 output_address (XEXP (x, 0));
1878 off = XEXP (XEXP (x, 1), 1);
1879 if (XEXP (x, 0) == stack_pointer_rtx)
1880 {
1881 if (GET_CODE (x) == PRE_MODIFY)
1882 gcc_assert (INTVAL (off) > 0);
1883 else
1884 gcc_assert (INTVAL (off) < 0);
1885 }
1886 if (CONST_INT_P (off) && INTVAL (off) < 0)
1887 {
1888 fprintf (file, "--");
1889 off = GEN_INT (-INTVAL (off));
1890 }
1891 else
1892 fprintf (file, "++");
1893 if (GET_CODE (x) == PRE_MODIFY)
1894 output_address (XEXP (x, 0));
1895 print_address_offset (file, off, mem_mode);
1896 break;
1897
1898 case PLUS:
1899 off = XEXP (x, 1);
1900 if (CONST_INT_P (off) && INTVAL (off) < 0)
1901 {
1902 fprintf (file, "-");
1903 off = GEN_INT (-INTVAL (off));
1904 }
1905 else
1906 fprintf (file, "+");
1907 output_address (XEXP (x, 0));
1908 print_address_offset (file, off, mem_mode);
1909 break;
1910
1911 case PRE_DEC:
1912 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1913 fprintf (file, "--");
1914 output_address (XEXP (x, 0));
1915 fprintf (file, "[1]");
1916 break;
1917 case PRE_INC:
1918 fprintf (file, "++");
1919 output_address (XEXP (x, 0));
1920 fprintf (file, "[1]");
1921 break;
1922 case POST_INC:
1923 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1924 output_address (XEXP (x, 0));
1925 fprintf (file, "++[1]");
1926 break;
1927 case POST_DEC:
1928 output_address (XEXP (x, 0));
1929 fprintf (file, "--[1]");
1930 break;
1931
1932 case SYMBOL_REF:
1933 case CONST:
1934 case LABEL_REF:
1935 gcc_assert (sdata_symbolic_operand (x, Pmode));
1936 fprintf (file, "+B14(");
1937 output_addr_const (file, x);
1938 fprintf (file, ")");
1939 break;
1940
1941 case UNSPEC:
1942 switch (XINT (x, 1))
1943 {
1944 case UNSPEC_LOAD_GOT:
1945 fputs ("$GOT(", file);
1946 output_addr_const (file, XVECEXP (x, 0, 0));
1947 fputs (")", file);
1948 break;
1949 case UNSPEC_LOAD_SDATA:
1950 output_addr_const (file, XVECEXP (x, 0, 0));
1951 break;
1952 default:
1953 gcc_unreachable ();
1954 }
1955 break;
1956
1957 default:
1958 gcc_assert (GET_CODE (x) != MEM);
1959 c6x_print_operand (file, x, 0);
1960 break;
1961 }
1962}
1963
1964/* Return a single character, which is either 'l', 's', 'd' or 'm', which
1965 specifies the functional unit used by INSN. */
1966
1967char
1968c6x_get_unit_specifier (rtx insn)
1969{
1970 enum attr_units units;
1971
f1f41a6c 1972 if (insn_info.exists ())
5aa04b01 1973 {
1974 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1975 return c6x_unit_names[unit][0];
1976 }
1977
1978 units = get_attr_units (insn);
1979 switch (units)
1980 {
1981 case UNITS_D:
1982 case UNITS_DL:
1983 case UNITS_DS:
1984 case UNITS_DLS:
1985 case UNITS_D_ADDR:
1986 return 'd';
1987 break;
1988 case UNITS_L:
1989 case UNITS_LS:
1990 return 'l';
1991 break;
1992 case UNITS_S:
1993 return 's';
1994 break;
1995 case UNITS_M:
1996 return 'm';
1997 break;
1998 default:
1999 gcc_unreachable ();
2000 }
2001}
2002
2003/* Prints the unit specifier field. */
2004static void
2005c6x_print_unit_specifier_field (FILE *file, rtx insn)
2006{
2007 enum attr_units units = get_attr_units (insn);
2008 enum attr_cross cross = get_attr_cross (insn);
2009 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2010 int half;
2011 char unitspec;
2012
2013 if (units == UNITS_D_ADDR)
2014 {
2015 enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2016 int t_half;
2017 gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2018 half = arf == ADDR_REGFILE_A ? 1 : 2;
2019 t_half = rf == DEST_REGFILE_A ? 1 : 2;
2020 fprintf (file, ".d%dt%d", half, t_half);
2021 return;
2022 }
2023
f1f41a6c 2024 if (insn_info.exists ())
5aa04b01 2025 {
2026 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2027 fputs (".", file);
2028 fputs (c6x_unit_names[unit], file);
2029 if (cross == CROSS_Y)
2030 fputs ("x", file);
2031 return;
2032 }
2033
2034 gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2035 unitspec = c6x_get_unit_specifier (insn);
2036 half = rf == DEST_REGFILE_A ? 1 : 2;
2037 fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2038}
2039
2040/* Output assembly language output for the address ADDR to FILE. */
2041static void
2042c6x_print_operand_address (FILE *file, rtx addr)
2043{
2044 c6x_print_address_operand (file, addr, VOIDmode);
2045}
2046
2047/* Print an operand, X, to FILE, with an optional modifier in CODE.
2048
2049 Meaning of CODE:
2050 $ -- print the unit specifier field for the instruction.
2051 . -- print the predicate for the instruction or an emptry string for an
2052 unconditional one.
2053 | -- print "||" if the insn should be issued in parallel with the previous
2054 one.
2055
2056 C -- print an opcode suffix for a reversed condition
2057 d -- H, W or D as a suffix for ADDA, based on the factor given by the
2058 operand
2059 D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2060 the operand
2061 J -- print a predicate
2062 j -- like J, but use reverse predicate
2063 k -- treat a CONST_INT as a register number and print it as a register
2064 k -- like k, but print out a doubleword register
2065 n -- print an integer operand, negated
2066 p -- print the low part of a DImode register
2067 P -- print the high part of a DImode register
2068 r -- print the absolute value of an integer operand, shifted right by 1
2069 R -- print the absolute value of an integer operand, shifted right by 2
2070 f -- the first clear bit in an integer operand assumed to be a mask for
2071 a clr instruction
2072 F -- the last clear bit in such a mask
2073 s -- the first set bit in an integer operand assumed to be a mask for
2074 a set instruction
2075 S -- the last set bit in such a mask
2076 U -- print either 1 or 2, depending on the side of the machine used by
2077 the operand */
2078
2079static void
2080c6x_print_operand (FILE *file, rtx x, int code)
2081{
2082 int i;
2083 HOST_WIDE_INT v;
2084 tree t;
2085 enum machine_mode mode;
2086
2087 if (code == '|')
2088 {
2089 if (GET_MODE (c6x_current_insn) != TImode)
2090 fputs ("||", file);
2091 return;
2092 }
2093 if (code == '$')
2094 {
2095 c6x_print_unit_specifier_field (file, c6x_current_insn);
2096 return;
2097 }
2098
2099 if (code == '.')
2100 {
2101 x = current_insn_predicate;
2102 if (x)
2103 {
2104 unsigned int regno = REGNO (XEXP (x, 0));
2105 fputs ("[", file);
2106 if (GET_CODE (x) == EQ)
2107 fputs ("!", file);
2108 fputs (reg_names [regno], file);
2109 fputs ("]", file);
2110 }
2111 return;
2112 }
2113
2114 mode = GET_MODE (x);
2115
2116 switch (code)
2117 {
2118 case 'C':
2119 case 'c':
2120 {
2121 enum rtx_code c = GET_CODE (x);
2122 if (code == 'C')
2123 c = swap_condition (c);
2124 fputs (GET_RTX_NAME (c), file);
2125 }
2126 return;
2127
2128 case 'J':
2129 case 'j':
2130 {
2131 unsigned int regno = REGNO (XEXP (x, 0));
2132 if ((GET_CODE (x) == EQ) == (code == 'J'))
2133 fputs ("!", file);
2134 fputs (reg_names [regno], file);
2135 }
2136 return;
2137
2138 case 'k':
2139 gcc_assert (GET_CODE (x) == CONST_INT);
2140 v = INTVAL (x);
2141 fprintf (file, "%s", reg_names[v]);
2142 return;
2143 case 'K':
2144 gcc_assert (GET_CODE (x) == CONST_INT);
2145 v = INTVAL (x);
2146 gcc_assert ((v & 1) == 0);
2147 fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2148 return;
2149
2150 case 's':
2151 case 'S':
2152 case 'f':
2153 case 'F':
2154 gcc_assert (GET_CODE (x) == CONST_INT);
2155 v = INTVAL (x);
2156 for (i = 0; i < 32; i++)
2157 {
2158 HOST_WIDE_INT tst = v & 1;
2159 if (((code == 'f' || code == 'F') && !tst)
2160 || ((code == 's' || code == 'S') && tst))
2161 break;
2162 v >>= 1;
2163 }
2164 if (code == 'f' || code == 's')
2165 {
2166 fprintf (file, "%d", i);
2167 return;
2168 }
2169 for (;i < 32; i++)
2170 {
2171 HOST_WIDE_INT tst = v & 1;
2172 if ((code == 'F' && tst) || (code == 'S' && !tst))
2173 break;
2174 v >>= 1;
2175 }
2176 fprintf (file, "%d", i - 1);
2177 return;
2178
2179 case 'n':
2180 gcc_assert (GET_CODE (x) == CONST_INT);
2181 output_addr_const (file, GEN_INT (-INTVAL (x)));
2182 return;
2183
2184 case 'r':
2185 gcc_assert (GET_CODE (x) == CONST_INT);
2186 v = INTVAL (x);
2187 if (v < 0)
2188 v = -v;
2189 output_addr_const (file, GEN_INT (v >> 1));
2190 return;
2191
2192 case 'R':
2193 gcc_assert (GET_CODE (x) == CONST_INT);
2194 v = INTVAL (x);
2195 if (v < 0)
2196 v = -v;
2197 output_addr_const (file, GEN_INT (v >> 2));
2198 return;
2199
2200 case 'd':
2201 gcc_assert (GET_CODE (x) == CONST_INT);
2202 v = INTVAL (x);
2203 fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2204 return;
2205
2206 case 'p':
2207 case 'P':
2208 gcc_assert (GET_CODE (x) == REG);
2209 v = REGNO (x);
2210 if (code == 'P')
2211 v++;
2212 fputs (reg_names[v], file);
2213 return;
2214
2215 case 'D':
2216 v = 0;
2217 if (GET_CODE (x) == CONST)
2218 {
2219 x = XEXP (x, 0);
2220 gcc_assert (GET_CODE (x) == PLUS);
2221 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2222 v = INTVAL (XEXP (x, 1));
2223 x = XEXP (x, 0);
2224
2225 }
2226 gcc_assert (GET_CODE (x) == SYMBOL_REF);
2227
2228 t = SYMBOL_REF_DECL (x);
2229 if (DECL_P (t))
2230 v |= DECL_ALIGN_UNIT (t);
2231 else
2232 v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2233 if (v & 1)
2234 fputs ("b", file);
2235 else if (v & 2)
2236 fputs ("h", file);
2237 else
2238 fputs ("w", file);
2239 return;
2240
2241 case 'U':
2242 if (MEM_P (x))
2243 {
2244 x = XEXP (x, 0);
2245 if (GET_CODE (x) == PLUS
2246 || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2247 x = XEXP (x, 0);
2248 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2249 {
2250 gcc_assert (sdata_symbolic_operand (x, Pmode));
2251 fputs ("2", file);
2252 return;
2253 }
2254 }
2255 gcc_assert (REG_P (x));
2256 if (A_REGNO_P (REGNO (x)))
2257 fputs ("1", file);
2258 if (B_REGNO_P (REGNO (x)))
2259 fputs ("2", file);
2260 return;
2261
2262 default:
2263 switch (GET_CODE (x))
2264 {
2265 case REG:
2266 if (GET_MODE_SIZE (mode) == 8)
2267 fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2268 reg_names[REGNO (x)]);
2269 else
2270 fprintf (file, "%s", reg_names[REGNO (x)]);
2271 break;
2272
2273 case MEM:
2274 fputc ('*', file);
2275 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2276 c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2277 break;
2278
2279 case SYMBOL_REF:
2280 fputc ('(', file);
2281 output_addr_const (file, x);
2282 fputc (')', file);
2283 break;
2284
2285 case CONST_INT:
2286 output_addr_const (file, x);
2287 break;
2288
2289 case CONST_DOUBLE:
2290 output_operand_lossage ("invalid const_double operand");
2291 break;
2292
2293 default:
2294 output_addr_const (file, x);
2295 }
2296 }
2297}
2298\f
2299/* Return TRUE if OP is a valid memory address with a base register of
2300 class C. If SMALL_OFFSET is true, we disallow memory references which would
2301 require a long offset with B14/B15. */
2302
2303bool
2304c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2305{
2306 enum machine_mode mode = GET_MODE (op);
2307 rtx base = XEXP (op, 0);
2308 switch (GET_CODE (base))
2309 {
2310 case REG:
2311 break;
2312 case PLUS:
2313 if (small_offset
2314 && (XEXP (base, 0) == stack_pointer_rtx
2315 || XEXP (base, 0) == pic_offset_table_rtx))
2316 {
2317 if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2318 return false;
2319 }
2320
2321 /* fall through */
2322 case PRE_INC:
2323 case PRE_DEC:
2324 case PRE_MODIFY:
2325 case POST_INC:
2326 case POST_DEC:
2327 case POST_MODIFY:
2328 base = XEXP (base, 0);
2329 break;
2330
2331 case CONST:
2332 case LABEL_REF:
2333 case SYMBOL_REF:
2334 gcc_assert (sdata_symbolic_operand (base, Pmode));
2335 return !small_offset && c == B_REGS;
2336
2337 default:
2338 return false;
2339 }
2340 return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2341}
2342
2343/* Returns true if X is a valid address for use in a memory reference
2344 of mode MODE. If STRICT is true, we do not allow pseudo registers
2345 in the address. NO_LARGE_OFFSET is true if we are examining an
2346 address for use in a load or store misaligned instruction, or
2347 recursively examining an operand inside a PRE/POST_MODIFY. */
2348
2349bool
2350c6x_legitimate_address_p_1 (enum machine_mode mode, rtx x, bool strict,
2351 bool no_large_offset)
2352{
2353 int size, size1;
2354 HOST_WIDE_INT off;
2355 enum rtx_code code = GET_CODE (x);
2356
2357 switch (code)
2358 {
2359 case PRE_MODIFY:
2360 case POST_MODIFY:
2361 /* We can't split these into word-sized pieces yet. */
2362 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2363 return false;
2364 if (GET_CODE (XEXP (x, 1)) != PLUS)
2365 return false;
2366 if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2367 return false;
2368 if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2369 return false;
2370
2371 /* fall through */
2372 case PRE_INC:
2373 case PRE_DEC:
2374 case POST_INC:
2375 case POST_DEC:
2376 /* We can't split these into word-sized pieces yet. */
2377 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2378 return false;
2379 x = XEXP (x, 0);
2380 if (!REG_P (x))
2381 return false;
2382
2383 /* fall through */
2384 case REG:
2385 if (strict)
2386 return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2387 else
2388 return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2389
2390 case PLUS:
2391 if (!REG_P (XEXP (x, 0))
2392 || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2393 return false;
2394 /* We cannot ensure currently that both registers end up in the
2395 same register file. */
2396 if (REG_P (XEXP (x, 1)))
2397 return false;
2398
2399 if (mode == BLKmode)
2400 size = 4;
2401 else if (mode == VOIDmode)
2402 /* ??? This can happen during ivopts. */
2403 size = 1;
2404 else
2405 size = GET_MODE_SIZE (mode);
2406
2407 if (flag_pic
2408 && GET_CODE (XEXP (x, 1)) == UNSPEC
2409 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2410 && XEXP (x, 0) == pic_offset_table_rtx
2411 && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2412 return !no_large_offset && size <= 4;
2413 if (flag_pic == 1
2414 && mode == Pmode
2415 && GET_CODE (XEXP (x, 1)) == UNSPEC
2416 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2417 && XEXP (x, 0) == pic_offset_table_rtx
2418 && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2419 || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2420 return !no_large_offset;
2421 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2422 return false;
2423
2424 off = INTVAL (XEXP (x, 1));
2425
2426 /* If the machine does not have doubleword load/stores, we'll use
2427 word size accesses. */
2428 size1 = size;
2429 if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2430 size = UNITS_PER_WORD;
2431
2432 if (((HOST_WIDE_INT)size1 - 1) & off)
2433 return false;
2434 off /= size;
2435 if (off > -32 && off < (size1 == size ? 32 : 28))
2436 return true;
2437 if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2438 || size1 > UNITS_PER_WORD)
2439 return false;
2440 return off >= 0 && off < 32768;
2441
2442 case CONST:
2443 case SYMBOL_REF:
2444 case LABEL_REF:
2445 return (!no_large_offset
2446 /* With -fpic, we must wrap it in an unspec to show the B14
2447 dependency. */
2448 && !flag_pic
2449 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2450 && sdata_symbolic_operand (x, Pmode));
2451
2452 default:
2453 return false;
2454 }
2455}
2456
2457static bool
2458c6x_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
2459{
2460 return c6x_legitimate_address_p_1 (mode, x, strict, false);
2461}
2462
2463static bool
2464c6x_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2465 rtx x ATTRIBUTE_UNUSED)
2466{
2467 return true;
2468}
2469\f
2470/* Implements TARGET_PREFERRED_RENAME_CLASS. */
2471static reg_class_t
2472c6x_preferred_rename_class (reg_class_t cl)
2473{
2474 if (cl == A_REGS)
2475 return NONPREDICATE_A_REGS;
2476 if (cl == B_REGS)
2477 return NONPREDICATE_B_REGS;
2478 if (cl == ALL_REGS || cl == GENERAL_REGS)
2479 return NONPREDICATE_REGS;
2480 return NO_REGS;
2481}
2482\f
2483/* Implements FINAL_PRESCAN_INSN. */
2484void
2485c6x_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
2486 int noperands ATTRIBUTE_UNUSED)
2487{
2488 c6x_current_insn = insn;
2489}
2490\f
2491/* A structure to describe the stack layout of a function. The layout is
2492 as follows:
2493
2494 [saved frame pointer (or possibly padding0)]
2495 --> incoming stack pointer, new hard frame pointer
2496 [saved call-used regs]
2497 [optional padding1]
2498 --> soft frame pointer
2499 [frame]
2500 [outgoing arguments]
2501 [optional padding2]
2502
2503 The structure members are laid out in this order. */
2504
2505struct c6x_frame
2506{
2507 int padding0;
2508 /* Number of registers to save. */
2509 int nregs;
2510 int padding1;
2511 HOST_WIDE_INT frame;
2512 int outgoing_arguments_size;
2513 int padding2;
2514
2515 HOST_WIDE_INT to_allocate;
2516 /* The offsets relative to the incoming stack pointer (which
2517 becomes HARD_FRAME_POINTER). */
2518 HOST_WIDE_INT frame_pointer_offset;
2519 HOST_WIDE_INT b3_offset;
2520
2521 /* True if we should call push_rts/pop_rts to save and restore
2522 registers. */
2523 bool push_rts;
2524};
2525
2526/* Return true if we need to save and modify the PIC register in the
2527 prologue. */
2528
2529static bool
2530must_reload_pic_reg_p (void)
2531{
2532 struct cgraph_local_info *i = NULL;
2533
2534 if (!TARGET_DSBT)
2535 return false;
2536
2537 i = cgraph_local_info (current_function_decl);
2538
d5bf7b64 2539 if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
5aa04b01 2540 return true;
2541 return false;
2542}
2543
2544/* Return 1 if we need to save REGNO. */
2545static int
2546c6x_save_reg (unsigned int regno)
2547{
2548 return ((df_regs_ever_live_p (regno)
2549 && !call_used_regs[regno]
2550 && !fixed_regs[regno])
2551 || (regno == RETURN_ADDR_REGNO
2552 && (df_regs_ever_live_p (regno)
d5bf7b64 2553 || !crtl->is_leaf))
5aa04b01 2554 || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2555}
2556
2557/* Examine the number of regs NREGS we've determined we must save.
2558 Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2559 prologue and epilogue. */
2560
2561static bool
2562use_push_rts_p (int nregs)
2563{
2564 if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2565 && !cfun->machine->contains_sibcall
2566 && !cfun->returns_struct
2567 && !TARGET_LONG_CALLS
2568 && nregs >= 6 && !frame_pointer_needed)
2569 return true;
2570 return false;
2571}
2572
2573/* Return number of saved general prupose registers. */
2574
2575int
2576c6x_nsaved_regs (void)
2577{
2578 int nregs = 0;
2579 int regno;
2580
2581 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2582 if (c6x_save_reg (regno))
2583 nregs++;
2584 return nregs;
2585}
2586
2587/* The safe debug order mandated by the ABI. */
2588static unsigned reg_save_order[] =
2589{
2590 REG_A10, REG_A11, REG_A12, REG_A13,
2591 REG_A14, REG_B3,
2592 REG_B10, REG_B11, REG_B12, REG_B13,
2593 REG_B14, REG_A15
2594};
2595
2596#define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2597
2598/* Compute the layout of the stack frame and store it in FRAME. */
2599
2600static void
2601c6x_compute_frame_layout (struct c6x_frame *frame)
2602{
2603 HOST_WIDE_INT size = get_frame_size ();
2604 HOST_WIDE_INT offset;
2605 int nregs;
2606
2607 /* We use the four bytes which are technically inside the caller's frame,
2608 usually to save the frame pointer. */
2609 offset = -4;
2610 frame->padding0 = 0;
2611 nregs = c6x_nsaved_regs ();
2612 frame->push_rts = false;
2613 frame->b3_offset = 0;
2614 if (use_push_rts_p (nregs))
2615 {
2616 frame->push_rts = true;
2617 frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2618 nregs = 14;
2619 }
2620 else if (c6x_save_reg (REG_B3))
2621 {
2622 int idx;
2623 for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2624 {
2625 if (c6x_save_reg (reg_save_order[idx]))
2626 frame->b3_offset -= 4;
2627 }
2628 }
2629 frame->nregs = nregs;
2630
2631 if (size == 0 && nregs == 0)
2632 {
2633 frame->padding0 = 4;
2634 frame->padding1 = frame->padding2 = 0;
2635 frame->frame_pointer_offset = frame->to_allocate = 0;
2636 frame->outgoing_arguments_size = 0;
2637 return;
2638 }
2639
2640 if (!frame->push_rts)
2641 offset += frame->nregs * 4;
2642
2643 if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
d5bf7b64 2644 && !crtl->is_leaf)
5aa04b01 2645 /* Don't use the bottom of the caller's frame if we have no
2646 allocation of our own and call other functions. */
2647 frame->padding0 = frame->padding1 = 4;
2648 else if (offset & 4)
2649 frame->padding1 = 4;
2650 else
2651 frame->padding1 = 0;
2652
2653 offset += frame->padding0 + frame->padding1;
2654 frame->frame_pointer_offset = offset;
2655 offset += size;
2656
2657 frame->outgoing_arguments_size = crtl->outgoing_args_size;
2658 offset += frame->outgoing_arguments_size;
2659
2660 if ((offset & 4) == 0)
2661 frame->padding2 = 8;
2662 else
2663 frame->padding2 = 4;
2664 frame->to_allocate = offset + frame->padding2;
2665}
2666
2667/* Return the offset between two registers, one to be eliminated, and the other
2668 its replacement, at the start of a routine. */
2669
2670HOST_WIDE_INT
2671c6x_initial_elimination_offset (int from, int to)
2672{
2673 struct c6x_frame frame;
2674 c6x_compute_frame_layout (&frame);
2675
2676 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2677 return 0;
2678 else if (from == FRAME_POINTER_REGNUM
2679 && to == HARD_FRAME_POINTER_REGNUM)
2680 return -frame.frame_pointer_offset;
2681 else
2682 {
2683 gcc_assert (to == STACK_POINTER_REGNUM);
2684
2685 if (from == ARG_POINTER_REGNUM)
2686 return frame.to_allocate + (frame.push_rts ? 56 : 0);
2687
2688 gcc_assert (from == FRAME_POINTER_REGNUM);
2689 return frame.to_allocate - frame.frame_pointer_offset;
2690 }
2691}
2692
2693/* Given FROM and TO register numbers, say whether this elimination is
2694 allowed. Frame pointer elimination is automatically handled. */
2695
2696static bool
2697c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2698{
2699 if (to == STACK_POINTER_REGNUM)
2700 return !frame_pointer_needed;
2701 return true;
2702}
2703
2704/* Emit insns to increment the stack pointer by OFFSET. If
2705 FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2706 Does nothing if the offset is zero. */
2707
2708static void
2709emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2710{
2711 rtx to_add = GEN_INT (offset);
2712 rtx orig_to_add = to_add;
2713 rtx insn;
2714
2715 if (offset == 0)
2716 return;
2717
2718 if (offset < -32768 || offset > 32767)
2719 {
2720 rtx reg = gen_rtx_REG (SImode, REG_A0);
2721 rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2722
2723 insn = emit_insn (gen_movsi_high (reg, low));
2724 if (frame_related_p)
2725 RTX_FRAME_RELATED_P (insn) = 1;
2726 insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2727 if (frame_related_p)
2728 RTX_FRAME_RELATED_P (insn) = 1;
2729 to_add = reg;
2730 }
2731 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2732 to_add));
2733 if (frame_related_p)
2734 {
2735 if (REG_P (to_add))
2736 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2737 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2738 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2739 orig_to_add)));
2740
2741 RTX_FRAME_RELATED_P (insn) = 1;
2742 }
2743}
2744
2745/* Prologue and epilogue. */
2746void
2747c6x_expand_prologue (void)
2748{
2749 struct c6x_frame frame;
2750 rtx insn, mem;
2751 int nsaved = 0;
2752 HOST_WIDE_INT initial_offset, off, added_already;
2753
2754 c6x_compute_frame_layout (&frame);
2755
2756 if (flag_stack_usage_info)
2757 current_function_static_stack_size = frame.to_allocate;
2758
2759 initial_offset = -frame.to_allocate;
2760 if (frame.push_rts)
2761 {
2762 emit_insn (gen_push_rts ());
2763 nsaved = frame.nregs;
2764 }
2765
2766 /* If the offsets would be too large for the memory references we will
2767 create to save registers, do the stack allocation in two parts.
2768 Ensure by subtracting 8 that we don't store to the word pointed to
2769 by the stack pointer. */
2770 if (initial_offset < -32768)
2771 initial_offset = -frame.frame_pointer_offset - 8;
2772
2773 if (frame.to_allocate > 0)
2774 gcc_assert (initial_offset != 0);
2775
2776 off = -initial_offset + 4 - frame.padding0;
2777
2778 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2779
2780 added_already = 0;
2781 if (frame_pointer_needed)
2782 {
2783 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2784 /* We go through some contortions here to both follow the ABI's
2785 recommendation that FP == incoming SP, and to avoid writing or
2786 reading the word pointed to by the stack pointer. */
2787 rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2788 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2789 GEN_INT (-8)));
2790 insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2791 RTX_FRAME_RELATED_P (insn) = 1;
2792 nsaved++;
2793 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2794 GEN_INT (8)));
2795 RTX_FRAME_RELATED_P (insn) = 1;
2796 off -= 4;
2797 added_already = -8;
2798 }
2799
2800 emit_add_sp_const (initial_offset - added_already, true);
2801
2802 if (nsaved < frame.nregs)
2803 {
2804 unsigned i;
2805
2806 for (i = 0; i < N_SAVE_ORDER; i++)
2807 {
2808 int idx = N_SAVE_ORDER - i - 1;
2809 unsigned regno = reg_save_order[idx];
2810 rtx reg;
2811 enum machine_mode save_mode = SImode;
2812
2813 if (regno == REG_A15 && frame_pointer_needed)
2814 /* Already saved. */
2815 continue;
2816 if (!c6x_save_reg (regno))
2817 continue;
2818
2819 if (TARGET_STDW && (off & 4) == 0 && off <= 256
2820 && (regno & 1) == 1
2821 && i + 1 < N_SAVE_ORDER
2822 && reg_save_order[idx - 1] == regno - 1
2823 && c6x_save_reg (regno - 1))
2824 {
2825 save_mode = DImode;
2826 regno--;
2827 i++;
2828 }
2829 reg = gen_rtx_REG (save_mode, regno);
2830 off -= GET_MODE_SIZE (save_mode);
2831
2832 insn = emit_move_insn (adjust_address (mem, save_mode, off),
2833 reg);
2834 RTX_FRAME_RELATED_P (insn) = 1;
2835
2836 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2837 }
2838 }
2839 gcc_assert (nsaved == frame.nregs);
2840 emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2841 if (must_reload_pic_reg_p ())
2842 {
2843 if (dsbt_decl == NULL)
2844 {
2845 tree t;
2846
2847 t = build_index_type (integer_one_node);
2848 t = build_array_type (integer_type_node, t);
2849 t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2850 get_identifier ("__c6xabi_DSBT_BASE"), t);
2851 DECL_ARTIFICIAL (t) = 1;
2852 DECL_IGNORED_P (t) = 1;
2853 DECL_EXTERNAL (t) = 1;
2854 TREE_STATIC (t) = 1;
2855 TREE_PUBLIC (t) = 1;
2856 TREE_USED (t) = 1;
2857
2858 dsbt_decl = t;
2859 }
2860 emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2861 XEXP (DECL_RTL (dsbt_decl), 0)));
2862 }
2863}
2864
2865void
2866c6x_expand_epilogue (bool sibcall)
2867{
2868 unsigned i;
2869 struct c6x_frame frame;
2870 rtx mem;
2871 HOST_WIDE_INT off;
2872 int nsaved = 0;
2873
2874 c6x_compute_frame_layout (&frame);
2875
2876 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2877
2878 /* Insert a dummy set/use of the stack pointer. This creates a
2879 scheduler barrier between the prologue saves and epilogue restores. */
2880 emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2881
2882 /* If the offsets would be too large for the memory references we will
2883 create to restore registers, do a preliminary stack adjustment here. */
2884 off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2885 if (frame.push_rts)
2886 {
2887 nsaved = frame.nregs;
2888 }
2889 else
2890 {
2891 if (frame.to_allocate > 32768)
2892 {
2893 /* Don't add the entire offset so that we leave an unused word
2894 above the stack pointer. */
2895 emit_add_sp_const ((off - 16) & ~7, false);
2896 off &= 7;
2897 off += 16;
2898 }
2899 for (i = 0; i < N_SAVE_ORDER; i++)
2900 {
2901 unsigned regno = reg_save_order[i];
2902 rtx reg;
2903 enum machine_mode save_mode = SImode;
2904
2905 if (!c6x_save_reg (regno))
2906 continue;
2907 if (regno == REG_A15 && frame_pointer_needed)
2908 continue;
2909
2910 if (TARGET_STDW && (off & 4) == 0 && off < 256
2911 && (regno & 1) == 0
2912 && i + 1 < N_SAVE_ORDER
2913 && reg_save_order[i + 1] == regno + 1
2914 && c6x_save_reg (regno + 1))
2915 {
2916 save_mode = DImode;
2917 i++;
2918 }
2919 reg = gen_rtx_REG (save_mode, regno);
2920
2921 emit_move_insn (reg, adjust_address (mem, save_mode, off));
2922
2923 off += GET_MODE_SIZE (save_mode);
2924 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2925 }
2926 }
2927 if (!frame_pointer_needed)
2928 emit_add_sp_const (off + frame.padding0 - 4, false);
2929 else
2930 {
2931 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2932 rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2933 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2934 GEN_INT (8)));
2935 emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2936 GEN_INT (-8)));
2937 emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2938 nsaved++;
2939 }
2940 gcc_assert (nsaved == frame.nregs);
2941 if (!sibcall)
2942 {
2943 if (frame.push_rts)
2944 emit_jump_insn (gen_pop_rts ());
2945 else
2946 emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2947 RETURN_ADDR_REGNO)));
2948 }
2949}
2950
2951/* Return the value of the return address for the frame COUNT steps up
2952 from the current frame, after the prologue.
2953 We punt for everything but the current frame by returning const0_rtx. */
2954
2955rtx
2956c6x_return_addr_rtx (int count)
2957{
2958 if (count != 0)
2959 return const0_rtx;
2960
2961 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2962}
2963\f
2964/* Return true iff TYPE is one of the shadow types. */
2965static bool
2966shadow_type_p (enum attr_type type)
2967{
2968 return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2969 || type == TYPE_MULT_SHADOW);
2970}
2971
2972/* Return true iff INSN is a shadow pattern. */
2973static bool
2974shadow_p (rtx insn)
2975{
2976 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2977 return false;
2978 return shadow_type_p (get_attr_type (insn));
2979}
2980
2981/* Return true iff INSN is a shadow or blockage pattern. */
2982static bool
2983shadow_or_blockage_p (rtx insn)
2984{
2985 enum attr_type type;
2986 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2987 return false;
2988 type = get_attr_type (insn);
2989 return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2990}
2991\f
2992/* Translate UNITS into a bitmask of units we can reserve for this
2993 insn. */
2994static int
2995get_reservation_flags (enum attr_units units)
2996{
2997 switch (units)
2998 {
2999 case UNITS_D:
3000 case UNITS_D_ADDR:
3001 return RESERVATION_FLAG_D;
3002 case UNITS_L:
3003 return RESERVATION_FLAG_L;
3004 case UNITS_S:
3005 return RESERVATION_FLAG_S;
3006 case UNITS_M:
3007 return RESERVATION_FLAG_M;
3008 case UNITS_LS:
3009 return RESERVATION_FLAG_LS;
3010 case UNITS_DL:
3011 return RESERVATION_FLAG_DL;
3012 case UNITS_DS:
3013 return RESERVATION_FLAG_DS;
3014 case UNITS_DLS:
3015 return RESERVATION_FLAG_DLS;
3016 default:
3017 return 0;
3018 }
3019}
3020
3021/* Compute the side of the machine used by INSN, which reserves UNITS.
3022 This must match the reservations in the scheduling description. */
3023static int
3024get_insn_side (rtx insn, enum attr_units units)
3025{
3026 if (units == UNITS_D_ADDR)
3027 return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3028 else
3029 {
3030 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3031 if (rf == DEST_REGFILE_ANY)
3032 return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3033 else
3034 return rf == DEST_REGFILE_A ? 0 : 1;
3035 }
3036}
3037
3038/* After scheduling, walk the insns between HEAD and END and assign unit
3039 reservations. */
3040static void
3041assign_reservations (rtx head, rtx end)
3042{
3043 rtx insn;
3044 for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3045 {
87ded687 3046 unsigned int sched_mask, reserved;
3047 rtx within, last;
5aa04b01 3048 int pass;
3049 int rsrv[2];
3050 int rsrv_count[2][4];
87ded687 3051 int i;
5aa04b01 3052
3053 if (GET_MODE (insn) != TImode)
3054 continue;
3055
87ded687 3056 reserved = 0;
3057 last = NULL_RTX;
3058 /* Find the last insn in the packet. It has a state recorded for it,
3059 which we can use to determine the units we should be using. */
3060 for (within = insn;
3061 (within != NEXT_INSN (end)
3062 && (within == insn || GET_MODE (within) != TImode));
3063 within = NEXT_INSN (within))
3064 {
3065 int icode;
3066 if (!NONDEBUG_INSN_P (within))
3067 continue;
3068 icode = recog_memoized (within);
3069 if (icode < 0)
3070 continue;
3071 if (shadow_p (within))
3072 continue;
3073 if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3074 reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3075 last = within;
3076 }
3077 if (last == NULL_RTX)
3078 continue;
3079
3080 sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3081 sched_mask &= ~reserved;
3082
5aa04b01 3083 memset (rsrv_count, 0, sizeof rsrv_count);
87ded687 3084 rsrv[0] = rsrv[1] = ~0;
3085 for (i = 0; i < 8; i++)
3086 {
3087 int side = i / 4;
3088 int unit = i & 3;
3089 unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3090 /* Clear the bits which we expect to reserve in the following loop,
3091 leaving the ones set which aren't present in the scheduler's
3092 state and shouldn't be reserved. */
3093 if (sched_mask & unit_bit)
3094 rsrv[i / 4] &= ~(1 << unit);
3095 }
5aa04b01 3096
3097 /* Walk through the insns that occur in the same cycle. We use multiple
3098 passes to assign units, assigning for insns with the most specific
3099 requirements first. */
3100 for (pass = 0; pass < 4; pass++)
3101 for (within = insn;
3102 (within != NEXT_INSN (end)
3103 && (within == insn || GET_MODE (within) != TImode));
3104 within = NEXT_INSN (within))
3105 {
87ded687 3106 int uid = INSN_UID (within);
5aa04b01 3107 int this_rsrv, side;
3108 int icode;
3109 enum attr_units units;
87ded687 3110 enum attr_type type;
5aa04b01 3111 int j;
3112
3113 if (!NONDEBUG_INSN_P (within))
3114 continue;
3115 icode = recog_memoized (within);
3116 if (icode < 0)
3117 continue;
87ded687 3118 if (INSN_INFO_ENTRY (uid).reservation != 0)
3119 continue;
5aa04b01 3120 units = get_attr_units (within);
87ded687 3121 type = get_attr_type (within);
5aa04b01 3122 this_rsrv = get_reservation_flags (units);
3123 if (this_rsrv == 0)
3124 continue;
3125 side = get_insn_side (within, units);
3126
87ded687 3127 /* Certain floating point instructions are treated specially. If
3128 an insn can choose between units it can reserve, and its
3129 reservation spans more than one cycle, the reservation contains
3130 special markers in the first cycle to help us reconstruct what
3131 the automaton chose. */
3132 if ((type == TYPE_ADDDP || type == TYPE_FP4)
3133 && units == UNITS_LS)
3134 {
3135 int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3136 + side * UNIT_QID_SIDE_OFFSET);
3137 int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3138 + side * UNIT_QID_SIDE_OFFSET);
3139 if ((sched_mask & (1 << test1_code)) != 0)
3140 {
3141 this_rsrv = RESERVATION_FLAG_L;
3142 sched_mask &= ~(1 << test1_code);
3143 }
3144 else if ((sched_mask & (1 << test2_code)) != 0)
3145 {
3146 this_rsrv = RESERVATION_FLAG_S;
3147 sched_mask &= ~(1 << test2_code);
3148 }
3149 }
3150
5aa04b01 3151 if ((this_rsrv & (this_rsrv - 1)) == 0)
3152 {
87ded687 3153 int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
5aa04b01 3154 rsrv[side] |= this_rsrv;
87ded687 3155 INSN_INFO_ENTRY (uid).reservation = t;
5aa04b01 3156 continue;
3157 }
3158
3159 if (pass == 1)
3160 {
3161 for (j = 0; j < 4; j++)
3162 if (this_rsrv & (1 << j))
3163 rsrv_count[side][j]++;
3164 continue;
3165 }
3166 if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3167 || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3168 {
3169 int best = -1, best_cost = INT_MAX;
3170 for (j = 0; j < 4; j++)
3171 if ((this_rsrv & (1 << j))
3172 && !(rsrv[side] & (1 << j))
3173 && rsrv_count[side][j] < best_cost)
3174 {
3175 best_cost = rsrv_count[side][j];
3176 best = j;
3177 }
3178 gcc_assert (best != -1);
3179 rsrv[side] |= 1 << best;
3180 for (j = 0; j < 4; j++)
3181 if ((this_rsrv & (1 << j)) && j != best)
3182 rsrv_count[side][j]--;
3183
87ded687 3184 INSN_INFO_ENTRY (uid).reservation
3185 = best + side * UNIT_QID_SIDE_OFFSET;
5aa04b01 3186 }
3187 }
3188 }
3189}
758df283 3190
3191/* Return a factor by which to weight unit imbalances for a reservation
3192 R. */
3193static int
3194unit_req_factor (enum unitreqs r)
3195{
3196 switch (r)
3197 {
3198 case UNIT_REQ_D:
3199 case UNIT_REQ_L:
3200 case UNIT_REQ_S:
3201 case UNIT_REQ_M:
3202 case UNIT_REQ_X:
3203 case UNIT_REQ_T:
3204 return 1;
3205 case UNIT_REQ_DL:
3206 case UNIT_REQ_LS:
3207 case UNIT_REQ_DS:
3208 return 2;
3209 case UNIT_REQ_DLS:
3210 return 3;
3211 default:
3212 gcc_unreachable ();
3213 }
3214}
3215
3216/* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3217 requirements. Returns zero if INSN can't be handled, otherwise
3218 either one or two to show how many of the two pairs are in use.
3219 REQ1 is always used, it holds what is normally thought of as the
3220 instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either
3221 describe a cross path, or for loads/stores, the T unit. */
3222static int
3223get_unit_reqs (rtx insn, int *req1, int *side1, int *req2, int *side2)
3224{
3225 enum attr_units units;
3226 enum attr_cross cross;
3227 int side, req;
3228
3229 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3230 return 0;
3231 units = get_attr_units (insn);
3232 if (units == UNITS_UNKNOWN)
3233 return 0;
3234 side = get_insn_side (insn, units);
3235 cross = get_attr_cross (insn);
3236
3237 req = (units == UNITS_D ? UNIT_REQ_D
3238 : units == UNITS_D_ADDR ? UNIT_REQ_D
3239 : units == UNITS_DL ? UNIT_REQ_DL
3240 : units == UNITS_DS ? UNIT_REQ_DS
3241 : units == UNITS_L ? UNIT_REQ_L
3242 : units == UNITS_LS ? UNIT_REQ_LS
3243 : units == UNITS_S ? UNIT_REQ_S
3244 : units == UNITS_M ? UNIT_REQ_M
3245 : units == UNITS_DLS ? UNIT_REQ_DLS
3246 : -1);
3247 gcc_assert (req != -1);
3248 *req1 = req;
3249 *side1 = side;
3250 if (units == UNITS_D_ADDR)
3251 {
3252 *req2 = UNIT_REQ_T;
3253 *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3254 return 2;
3255 }
3256 else if (cross == CROSS_Y)
3257 {
3258 *req2 = UNIT_REQ_X;
3259 *side2 = side;
3260 return 2;
3261 }
3262 return 1;
3263}
3264
3265/* Walk the insns between and including HEAD and TAIL, and mark the
3266 resource requirements in the unit_reqs table. */
3267static void
3268count_unit_reqs (unit_req_table reqs, rtx head, rtx tail)
3269{
3270 rtx insn;
3271
3272 memset (reqs, 0, sizeof (unit_req_table));
3273
3274 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3275 {
3276 int side1, side2, req1, req2;
3277
3278 switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3279 {
3280 case 2:
3281 reqs[side2][req2]++;
3282 /* fall through */
3283 case 1:
3284 reqs[side1][req1]++;
3285 break;
3286 }
3287 }
3288}
3289
3290/* Update the table REQS by merging more specific unit reservations into
3291 more general ones, i.e. counting (for example) UNIT_REQ_D also in
3292 UNIT_REQ_DL, DS, and DLS. */
3293static void
3294merge_unit_reqs (unit_req_table reqs)
3295{
3296 int side;
3297 for (side = 0; side < 2; side++)
3298 {
3299 int d = reqs[side][UNIT_REQ_D];
3300 int l = reqs[side][UNIT_REQ_L];
3301 int s = reqs[side][UNIT_REQ_S];
3302 int dl = reqs[side][UNIT_REQ_DL];
3303 int ls = reqs[side][UNIT_REQ_LS];
3304 int ds = reqs[side][UNIT_REQ_DS];
3305
3306 reqs[side][UNIT_REQ_DL] += d;
3307 reqs[side][UNIT_REQ_DL] += l;
3308 reqs[side][UNIT_REQ_DS] += d;
3309 reqs[side][UNIT_REQ_DS] += s;
3310 reqs[side][UNIT_REQ_LS] += l;
3311 reqs[side][UNIT_REQ_LS] += s;
3312 reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3313 }
3314}
3315
9a4c7d5b 3316/* Examine the table REQS and return a measure of unit imbalance by comparing
3317 the two sides of the machine. If, for example, D1 is used twice and D2
3318 used not at all, the return value should be 1 in the absence of other
3319 imbalances. */
3320static int
3321unit_req_imbalance (unit_req_table reqs)
3322{
3323 int val = 0;
3324 int i;
3325
3326 for (i = 0; i < UNIT_REQ_MAX; i++)
3327 {
6d004ec3 3328 int factor = unit_req_factor ((enum unitreqs) i);
9a4c7d5b 3329 int diff = abs (reqs[0][i] - reqs[1][i]);
3330 val += (diff + factor - 1) / factor / 2;
3331 }
3332 return val;
3333}
3334
758df283 3335/* Return the resource-constrained minimum iteration interval given the
3336 data in the REQS table. This must have been processed with
3337 merge_unit_reqs already. */
3338static int
3339res_mii (unit_req_table reqs)
3340{
3341 int side, req;
3342 int worst = 1;
3343 for (side = 0; side < 2; side++)
3344 for (req = 0; req < UNIT_REQ_MAX; req++)
3345 {
6d004ec3 3346 int factor = unit_req_factor ((enum unitreqs) req);
758df283 3347 worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3348 }
3349
3350 return worst;
3351}
9a4c7d5b 3352
3353/* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3354 the operands that are involved in the (up to) two reservations, as
3355 found by get_unit_reqs. Return true if we did this successfully, false
3356 if we couldn't identify what to do with INSN. */
3357static bool
3358get_unit_operand_masks (rtx insn, unsigned int *pmask1, unsigned int *pmask2)
3359{
9a4c7d5b 3360 enum attr_op_pattern op_pat;
3361
3362 if (recog_memoized (insn) < 0)
3363 return 0;
3364 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3365 return false;
3366 extract_insn (insn);
3367 op_pat = get_attr_op_pattern (insn);
3368 if (op_pat == OP_PATTERN_DT)
3369 {
3370 gcc_assert (recog_data.n_operands == 2);
3371 *pmask1 = 1 << 0;
3372 *pmask2 = 1 << 1;
3373 return true;
3374 }
3375 else if (op_pat == OP_PATTERN_TD)
3376 {
3377 gcc_assert (recog_data.n_operands == 2);
3378 *pmask1 = 1 << 1;
3379 *pmask2 = 1 << 0;
3380 return true;
3381 }
3382 else if (op_pat == OP_PATTERN_SXS)
3383 {
3384 gcc_assert (recog_data.n_operands == 3);
3385 *pmask1 = (1 << 0) | (1 << 2);
3386 *pmask2 = 1 << 1;
3387 return true;
3388 }
3389 else if (op_pat == OP_PATTERN_SX)
3390 {
3391 gcc_assert (recog_data.n_operands == 2);
3392 *pmask1 = 1 << 0;
3393 *pmask2 = 1 << 1;
3394 return true;
3395 }
3396 else if (op_pat == OP_PATTERN_SSX)
3397 {
3398 gcc_assert (recog_data.n_operands == 3);
3399 *pmask1 = (1 << 0) | (1 << 1);
3400 *pmask2 = 1 << 2;
3401 return true;
3402 }
3403 return false;
3404}
3405
3406/* Try to replace a register in INSN, which has corresponding rename info
3407 from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information
3408 about the operands that must be renamed and the side they are on.
3409 REQS is the table of unit reservations in the loop between HEAD and TAIL.
3410 We recompute this information locally after our transformation, and keep
3411 it only if we managed to improve the balance. */
3412static void
3413try_rename_operands (rtx head, rtx tail, unit_req_table reqs, rtx insn,
3414 insn_rr_info *info, unsigned int op_mask, int orig_side)
3415{
3416 enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3417 HARD_REG_SET unavailable;
9a4c7d5b 3418 du_head_p this_head;
3419 struct du_chain *chain;
3420 int i;
3421 unsigned tmp_mask;
3422 int best_reg, old_reg;
f1f41a6c 3423 vec<du_head_p> involved_chains = vec<du_head_p>();
9a4c7d5b 3424 unit_req_table new_reqs;
3425
3426 for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3427 {
3428 du_head_p op_chain;
3429 if ((tmp_mask & (1 << i)) == 0)
3430 continue;
3431 if (info->op_info[i].n_chains != 1)
3432 goto out_fail;
3433 op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
f1f41a6c 3434 involved_chains.safe_push (op_chain);
9a4c7d5b 3435 tmp_mask &= ~(1 << i);
3436 }
3437
f1f41a6c 3438 if (involved_chains.length () > 1)
9a4c7d5b 3439 goto out_fail;
3440
f1f41a6c 3441 this_head = involved_chains[0];
9a4c7d5b 3442 if (this_head->cannot_rename)
3443 goto out_fail;
3444
3445 for (chain = this_head->first; chain; chain = chain->next_use)
3446 {
3447 unsigned int mask1, mask2, mask_changed;
3448 int count, side1, side2, req1, req2;
f1f41a6c 3449 insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
9a4c7d5b 3450
3451 count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3452
3453 if (count == 0)
3454 goto out_fail;
3455
3456 if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3457 goto out_fail;
3458
3459 extract_insn (chain->insn);
3460
3461 mask_changed = 0;
3462 for (i = 0; i < recog_data.n_operands; i++)
3463 {
3464 int j;
3465 int n_this_op = this_rr->op_info[i].n_chains;
3466 for (j = 0; j < n_this_op; j++)
3467 {
3468 du_head_p other = this_rr->op_info[i].heads[j];
3469 if (regrename_chain_from_id (other->id) == this_head)
3470 break;
3471 }
3472 if (j == n_this_op)
3473 continue;
3474
3475 if (n_this_op != 1)
3476 goto out_fail;
3477 mask_changed |= 1 << i;
3478 }
3479 gcc_assert (mask_changed != 0);
3480 if (mask_changed != mask1 && mask_changed != mask2)
3481 goto out_fail;
3482 }
3483
3484 /* If we get here, we can do the renaming. */
3485 COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3486
3487 old_reg = this_head->regno;
3488 best_reg = find_best_rename_reg (this_head, super_class, &unavailable, old_reg);
3489
3490 regrename_do_replace (this_head, best_reg);
3491
3492 count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3493 merge_unit_reqs (new_reqs);
3494 if (dump_file)
3495 {
3496 fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3497 "original side %d, new reg %d\n",
3498 INSN_UID (insn), op_mask, orig_side, best_reg);
3499 fprintf (dump_file, " imbalance %d -> %d\n",
3500 unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3501 }
3502 if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3503 regrename_do_replace (this_head, old_reg);
3504 else
3505 memcpy (reqs, new_reqs, sizeof (unit_req_table));
3506
3507 out_fail:
f1f41a6c 3508 involved_chains.release ();
9a4c7d5b 3509}
3510
3511/* Find insns in LOOP which would, if shifted to the other side
3512 of the machine, reduce an imbalance in the unit reservations. */
3513static void
3514reshuffle_units (basic_block loop)
3515{
3516 rtx head = BB_HEAD (loop);
3517 rtx tail = BB_END (loop);
3518 rtx insn;
9a4c7d5b 3519 unit_req_table reqs;
3520 edge e;
3521 edge_iterator ei;
9a4c7d5b 3522 bitmap_head bbs;
3523
3524 count_unit_reqs (reqs, head, PREV_INSN (tail));
3525 merge_unit_reqs (reqs);
3526
3527 regrename_init (true);
3528
3529 bitmap_initialize (&bbs, &bitmap_default_obstack);
3530
3531 FOR_EACH_EDGE (e, ei, loop->preds)
6d004ec3 3532 bitmap_set_bit (&bbs, e->src->index);
3533
9a4c7d5b 3534 bitmap_set_bit (&bbs, loop->index);
3535 regrename_analyze (&bbs);
3536
3537 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3538 {
3539 enum attr_units units;
9a4c7d5b 3540 int count, side1, side2, req1, req2;
3541 unsigned int mask1, mask2;
3542 insn_rr_info *info;
3543
3544 if (!NONDEBUG_INSN_P (insn))
3545 continue;
3546
3547 count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3548
3549 if (count == 0)
3550 continue;
3551
3552 if (!get_unit_operand_masks (insn, &mask1, &mask2))
3553 continue;
3554
f1f41a6c 3555 info = &insn_rr[INSN_UID (insn)];
9a4c7d5b 3556 if (info->op_info == NULL)
3557 continue;
3558
3559 if (reqs[side1][req1] > 1
3560 && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3561 {
3562 try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3563 }
3564
3565 units = get_attr_units (insn);
3566 if (units == UNITS_D_ADDR)
3567 {
3568 gcc_assert (count == 2);
3569 if (reqs[side2][req2] > 1
3570 && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3571 {
3572 try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3573 }
3574 }
3575 }
3576 regrename_finish ();
3577}
5aa04b01 3578\f
3579/* Backend scheduling state. */
3580typedef struct c6x_sched_context
3581{
3582 /* The current scheduler clock, saved in the sched_reorder hook. */
3583 int curr_sched_clock;
3584
3585 /* Number of insns issued so far in this cycle. */
3586 int issued_this_cycle;
3587
3588 /* We record the time at which each jump occurs in JUMP_CYCLES. The
3589 theoretical maximum for number of jumps in flight is 12: 2 every
3590 cycle, with a latency of 6 cycles each. This is a circular
3591 buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier
3592 jumps have a higher index. This array should be accessed through
3593 the jump_cycle function. */
3594 int jump_cycles[12];
3595 int jump_cycle_index;
3596
3597 /* In parallel with jump_cycles, this array records the opposite of
3598 the condition used in each pending jump. This is used to
3599 predicate insns that are scheduled in the jump's delay slots. If
3600 this is NULL_RTX no such predication happens. */
3601 rtx jump_cond[12];
3602
3603 /* Similar to the jump_cycles mechanism, but here we take into
3604 account all insns with delay slots, to avoid scheduling asms into
3605 the delay slots. */
3606 int delays_finished_at;
3607
3608 /* The following variable value is the last issued insn. */
3609 rtx last_scheduled_insn;
758df283 3610 /* The last issued insn that isn't a shadow of another. */
3611 rtx last_scheduled_iter0;
5aa04b01 3612
87ded687 3613 /* The following variable value is DFA state before issuing the
3614 first insn in the current clock cycle. We do not use this member
3615 of the structure directly; we copy the data in and out of
3616 prev_cycle_state. */
3617 state_t prev_cycle_state_ctx;
758df283 3618
5aa04b01 3619 int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3620 int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3621 int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3622
3623 int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3624 int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3625} *c6x_sched_context_t;
3626
3627/* The current scheduling state. */
3628static struct c6x_sched_context ss;
3629
9d75589a 3630/* The following variable value is DFA state before issuing the first insn
87ded687 3631 in the current clock cycle. This is used in c6x_variable_issue for
3632 comparison with the state after issuing the last insn in a cycle. */
3633static state_t prev_cycle_state;
3634
5aa04b01 3635/* Set when we discover while processing an insn that it would lead to too
3636 many accesses of the same register. */
3637static bool reg_access_stall;
3638
758df283 3639/* The highest insn uid after delayed insns were split, but before loop bodies
3640 were copied by the modulo scheduling code. */
3641static int sploop_max_uid_iter0;
3642
5aa04b01 3643/* Look up the jump cycle with index N. For an out-of-bounds N, we return 0,
3644 so the caller does not specifically have to test for it. */
3645static int
3646get_jump_cycle (int n)
3647{
3648 if (n >= 12)
3649 return 0;
3650 n += ss.jump_cycle_index;
3651 if (n >= 12)
3652 n -= 12;
3653 return ss.jump_cycles[n];
3654}
3655
3656/* Look up the jump condition with index N. */
3657static rtx
3658get_jump_cond (int n)
3659{
3660 if (n >= 12)
3661 return NULL_RTX;
3662 n += ss.jump_cycle_index;
3663 if (n >= 12)
3664 n -= 12;
3665 return ss.jump_cond[n];
3666}
3667
3668/* Return the index of the first jump that occurs after CLOCK_VAR. If no jump
3669 has delay slots beyond CLOCK_VAR, return -1. */
3670static int
3671first_jump_index (int clock_var)
3672{
3673 int retval = -1;
3674 int n = 0;
3675 for (;;)
3676 {
3677 int t = get_jump_cycle (n);
3678 if (t <= clock_var)
3679 break;
3680 retval = n;
3681 n++;
3682 }
3683 return retval;
3684}
3685
3686/* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3687 and has the opposite condition of COND. */
3688static void
3689record_jump (int cycle, rtx cond)
3690{
3691 if (ss.jump_cycle_index == 0)
3692 ss.jump_cycle_index = 11;
3693 else
3694 ss.jump_cycle_index--;
3695 ss.jump_cycles[ss.jump_cycle_index] = cycle;
3696 ss.jump_cond[ss.jump_cycle_index] = cond;
3697}
3698
3699/* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in
3700 new_conditions. */
3701static void
3702insn_set_clock (rtx insn, int cycle)
3703{
3704 unsigned uid = INSN_UID (insn);
3705
3706 if (uid >= INSN_INFO_LENGTH)
f1f41a6c 3707 insn_info.safe_grow (uid * 5 / 4 + 10);
5aa04b01 3708
3709 INSN_INFO_ENTRY (uid).clock = cycle;
3710 INSN_INFO_ENTRY (uid).new_cond = NULL;
87ded687 3711 INSN_INFO_ENTRY (uid).reservation = 0;
5aa04b01 3712 INSN_INFO_ENTRY (uid).ebb_start = false;
3713}
3714
3715/* Return the clock cycle we set for the insn with uid UID. */
3716static int
3717insn_uid_get_clock (int uid)
3718{
3719 return INSN_INFO_ENTRY (uid).clock;
3720}
3721
3722/* Return the clock cycle we set for INSN. */
3723static int
3724insn_get_clock (rtx insn)
3725{
3726 return insn_uid_get_clock (INSN_UID (insn));
3727}
3728
3729/* Examine INSN, and if it is a conditional jump of any kind, return
3730 the opposite of the condition in which it branches. Otherwise,
3731 return NULL_RTX. */
3732static rtx
3733condjump_opposite_condition (rtx insn)
3734{
3735 rtx pat = PATTERN (insn);
3736 int icode = INSN_CODE (insn);
3737 rtx x = NULL;
3738
3739 if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3740 {
3741 x = XEXP (SET_SRC (pat), 0);
3742 if (icode == CODE_FOR_br_false)
3743 return x;
3744 }
3745 if (GET_CODE (pat) == COND_EXEC)
3746 {
3747 rtx t = COND_EXEC_CODE (pat);
3748 if ((GET_CODE (t) == PARALLEL
3749 && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3750 || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3751 || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3752 x = COND_EXEC_TEST (pat);
3753 }
3754
3755 if (x != NULL_RTX)
3756 {
3757 enum rtx_code code = GET_CODE (x);
3758 x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3759 GET_MODE (x), XEXP (x, 0),
3760 XEXP (x, 1));
3761 }
3762 return x;
3763}
3764
3765/* Return true iff COND1 and COND2 are exactly opposite conditions
3766 one of them NE and the other EQ. */
3767static bool
3768conditions_opposite_p (rtx cond1, rtx cond2)
3769{
3770 return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3771 && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3772 && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3773}
3774
3775/* Return true if we can add a predicate COND to INSN, or if INSN
3776 already has that predicate. If DOIT is true, also perform the
3777 modification. */
3778static bool
3779predicate_insn (rtx insn, rtx cond, bool doit)
3780{
3781 int icode;
3782 if (cond == NULL_RTX)
3783 {
3784 gcc_assert (!doit);
3785 return false;
3786 }
3787
3788 if (get_attr_predicable (insn) == PREDICABLE_YES
3789 && GET_CODE (PATTERN (insn)) != COND_EXEC)
3790 {
3791 if (doit)
3792 {
3793 rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3794 PATTERN (insn) = newpat;
3795 INSN_CODE (insn) = -1;
3796 }
3797 return true;
3798 }
3799 if (GET_CODE (PATTERN (insn)) == COND_EXEC
3800 && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3801 return true;
3802 icode = INSN_CODE (insn);
3803 if (icode == CODE_FOR_real_jump
3804 || icode == CODE_FOR_jump
3805 || icode == CODE_FOR_indirect_jump)
3806 {
3807 rtx pat = PATTERN (insn);
3808 rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3809 : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3810 : SET_SRC (pat));
3811 if (doit)
3812 {
3813 rtx newpat;
3814 if (REG_P (dest))
3815 newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3816 else
3817 newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3818 PATTERN (insn) = newpat;
3819 INSN_CODE (insn) = -1;
3820 }
3821 return true;
3822 }
3823 if (INSN_CODE (insn) == CODE_FOR_br_true)
3824 {
3825 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3826 return rtx_equal_p (br_cond, cond);
3827 }
3828 if (INSN_CODE (insn) == CODE_FOR_br_false)
3829 {
3830 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3831 return conditions_opposite_p (br_cond, cond);
3832 }
3833 return false;
3834}
3835
3836/* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */
3837static void
3838init_sched_state (c6x_sched_context_t sc)
3839{
3840 sc->last_scheduled_insn = NULL_RTX;
758df283 3841 sc->last_scheduled_iter0 = NULL_RTX;
5aa04b01 3842 sc->issued_this_cycle = 0;
3843 memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3844 memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3845 sc->jump_cycle_index = 0;
3846 sc->delays_finished_at = 0;
3847 sc->curr_sched_clock = 0;
3848
87ded687 3849 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3850
5aa04b01 3851 memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3852 memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3853 memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
87ded687 3854
3855 state_reset (sc->prev_cycle_state_ctx);
5aa04b01 3856}
3857
3858/* Allocate store for new scheduling context. */
3859static void *
3860c6x_alloc_sched_context (void)
3861{
3862 return xmalloc (sizeof (struct c6x_sched_context));
3863}
3864
3865/* If CLEAN_P is true then initializes _SC with clean data,
3866 and from the global context otherwise. */
3867static void
3868c6x_init_sched_context (void *_sc, bool clean_p)
3869{
3870 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3871
3872 if (clean_p)
3873 {
3874 init_sched_state (sc);
3875 }
3876 else
87ded687 3877 {
3878 *sc = ss;
3879 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3880 memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3881 }
5aa04b01 3882}
3883
3884/* Sets the global scheduling context to the one pointed to by _SC. */
3885static void
3886c6x_set_sched_context (void *_sc)
3887{
3888 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3889
3890 gcc_assert (sc != NULL);
3891 ss = *sc;
87ded687 3892 memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3893}
3894
3895/* Clear data in _SC. */
3896static void
3897c6x_clear_sched_context (void *_sc)
3898{
3899 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3900 gcc_assert (_sc != NULL);
3901
758df283 3902 free (sc->prev_cycle_state_ctx);
5aa04b01 3903}
3904
3905/* Free _SC. */
3906static void
3907c6x_free_sched_context (void *_sc)
3908{
3909 free (_sc);
3910}
3911
d452a169 3912/* True if we are currently performing a preliminary scheduling
3913 pass before modulo scheduling; we can't allow the scheduler to
3914 modify instruction patterns using packetization assumptions,
3915 since there will be another scheduling pass later if modulo
3916 scheduling fails. */
3917static bool in_hwloop;
3918
5aa04b01 3919/* Provide information about speculation capabilities, and set the
3920 DO_BACKTRACKING flag. */
3921static void
3922c6x_set_sched_flags (spec_info_t spec_info)
3923{
3924 unsigned int *flags = &(current_sched_info->flags);
3925
3926 if (*flags & SCHED_EBB)
3927 {
effd1640 3928 *flags |= DO_BACKTRACKING | DO_PREDICATION;
5aa04b01 3929 }
d452a169 3930 if (in_hwloop)
3931 *flags |= DONT_BREAK_DEPENDENCIES;
5aa04b01 3932
3933 spec_info->mask = 0;
3934}
3935
3936/* Implement the TARGET_SCHED_ISSUE_RATE hook. */
3937
3938static int
3939c6x_issue_rate (void)
3940{
3941 return 8;
3942}
3943
87ded687 3944/* Used together with the collapse_ndfa option, this ensures that we reach a
3945 deterministic automaton state before trying to advance a cycle.
3946 With collapse_ndfa, genautomata creates advance cycle arcs only for
3947 such deterministic states. */
3948
3949static rtx
3950c6x_sched_dfa_pre_cycle_insn (void)
3951{
3952 return const0_rtx;
3953}
3954
5aa04b01 3955/* We're beginning a new block. Initialize data structures as necessary. */
3956
3957static void
3958c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3959 int sched_verbose ATTRIBUTE_UNUSED,
3960 int max_ready ATTRIBUTE_UNUSED)
3961{
87ded687 3962 if (prev_cycle_state == NULL)
3963 {
3964 prev_cycle_state = xmalloc (dfa_state_size);
3965 }
5aa04b01 3966 init_sched_state (&ss);
87ded687 3967 state_reset (prev_cycle_state);
3968}
3969
3970/* We are about to being issuing INSN. Return nonzero if we cannot
3971 issue it on given cycle CLOCK and return zero if we should not sort
3972 the ready queue on the next clock start.
3973 For C6X, we use this function just to copy the previous DFA state
3974 for comparison purposes. */
3975
3976static int
3977c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3978 rtx insn ATTRIBUTE_UNUSED, int last_clock ATTRIBUTE_UNUSED,
3979 int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3980{
3981 if (clock != last_clock)
3982 memcpy (prev_cycle_state, curr_state, dfa_state_size);
3983 return 0;
5aa04b01 3984}
3985
3986static void
3987c6x_mark_regno_read (int regno, bool cross)
3988{
3989 int t = ++ss.tmp_reg_n_accesses[regno];
3990
3991 if (t > 4)
3992 reg_access_stall = true;
3993
3994 if (cross)
3995 {
3996 int set_cycle = ss.reg_set_in_cycle[regno];
3997 /* This must be done in this way rather than by tweaking things in
3998 adjust_cost, since the stall occurs even for insns with opposite
3999 predicates, and the scheduler may not even see a dependency. */
4000 if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
4001 reg_access_stall = true;
4002 /* This doesn't quite do anything yet as we're only modeling one
4003 x unit. */
4004 ++ss.tmp_reg_n_xaccesses[regno];
4005 }
4006}
4007
4008/* Note that REG is read in the insn being examined. If CROSS, it
4009 means the access is through a cross path. Update the temporary reg
4010 access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4011 in the current cycle. */
4012
4013static void
4014c6x_mark_reg_read (rtx reg, bool cross)
4015{
4016 unsigned regno = REGNO (reg);
4017 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4018
4019 while (nregs-- > 0)
4020 c6x_mark_regno_read (regno + nregs, cross);
4021}
4022
4023/* Note that register REG is written in cycle CYCLES. */
4024
4025static void
4026c6x_mark_reg_written (rtx reg, int cycles)
4027{
4028 unsigned regno = REGNO (reg);
4029 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4030
4031 while (nregs-- > 0)
4032 ss.reg_set_in_cycle[regno + nregs] = cycles;
4033}
4034
4035/* Update the register state information for an instruction whose
4036 body is X. Return true if the instruction has to be delayed until the
4037 next cycle. */
4038
4039static bool
4040c6x_registers_update (rtx insn)
4041{
4042 enum attr_cross cross;
4043 enum attr_dest_regfile destrf;
4044 int i, nops;
4045 rtx x;
4046
4047 if (!reload_completed || recog_memoized (insn) < 0)
4048 return false;
4049
4050 reg_access_stall = false;
4051 memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4052 sizeof ss.tmp_reg_n_accesses);
4053 memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4054 sizeof ss.tmp_reg_n_xaccesses);
4055
4056 extract_insn (insn);
4057
4058 cross = get_attr_cross (insn);
4059 destrf = get_attr_dest_regfile (insn);
4060
4061 nops = recog_data.n_operands;
4062 x = PATTERN (insn);
4063 if (GET_CODE (x) == COND_EXEC)
4064 {
4065 c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4066 nops -= 2;
4067 }
4068
4069 for (i = 0; i < nops; i++)
4070 {
4071 rtx op = recog_data.operand[i];
4072 if (recog_data.operand_type[i] == OP_OUT)
4073 continue;
4074 if (REG_P (op))
4075 {
4076 bool this_cross = cross;
4077 if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4078 this_cross = false;
4079 if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4080 this_cross = false;
4081 c6x_mark_reg_read (op, this_cross);
4082 }
4083 else if (MEM_P (op))
4084 {
4085 op = XEXP (op, 0);
4086 switch (GET_CODE (op))
4087 {
4088 case POST_INC:
4089 case PRE_INC:
4090 case POST_DEC:
4091 case PRE_DEC:
4092 op = XEXP (op, 0);
4093 /* fall through */
4094 case REG:
4095 c6x_mark_reg_read (op, false);
4096 break;
4097 case POST_MODIFY:
4098 case PRE_MODIFY:
4099 op = XEXP (op, 1);
4100 gcc_assert (GET_CODE (op) == PLUS);
4101 /* fall through */
4102 case PLUS:
4103 c6x_mark_reg_read (XEXP (op, 0), false);
4104 if (REG_P (XEXP (op, 1)))
4105 c6x_mark_reg_read (XEXP (op, 1), false);
4106 break;
4107 case SYMBOL_REF:
4108 case LABEL_REF:
4109 case CONST:
4110 c6x_mark_regno_read (REG_B14, false);
4111 break;
4112 default:
4113 gcc_unreachable ();
4114 }
4115 }
4116 else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4117 gcc_unreachable ();
4118 }
4119 return reg_access_stall;
4120}
4121
4122/* Helper function for the TARGET_SCHED_REORDER and
4123 TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe
4124 in the current cycle, move it down in the ready list and return the
4125 number of non-unsafe insns. */
4126
4127static int
4128c6x_sched_reorder_1 (rtx *ready, int *pn_ready, int clock_var)
4129{
4130 int n_ready = *pn_ready;
4131 rtx *e_ready = ready + n_ready;
4132 rtx *insnp;
4133 int first_jump;
4134
4135 /* Keep track of conflicts due to a limit number of register accesses,
4136 and due to stalls incurred by too early accesses of registers using
4137 cross paths. */
4138
4139 for (insnp = ready; insnp < e_ready; insnp++)
4140 {
4141 rtx insn = *insnp;
4142 int icode = recog_memoized (insn);
4143 bool is_asm = (icode < 0
4144 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4145 || asm_noperands (PATTERN (insn)) >= 0));
758df283 4146 bool no_parallel = (is_asm || icode == CODE_FOR_sploop
5aa04b01 4147 || (icode >= 0
4148 && get_attr_type (insn) == TYPE_ATOMIC));
4149
4150 /* We delay asm insns until all delay slots are exhausted. We can't
4151 accurately tell how many cycles an asm takes, and the main scheduling
4152 code always assumes at least 1 cycle, which may be wrong. */
4153 if ((no_parallel
4154 && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
758df283 4155 || c6x_registers_update (insn)
4156 || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
5aa04b01 4157 {
4158 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4159 *ready = insn;
4160 n_ready--;
4161 ready++;
4162 }
4163 else if (shadow_p (insn))
4164 {
4165 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4166 *ready = insn;
4167 }
4168 }
4169
4170 /* Ensure that no other jump is scheduled in jump delay slots, since
4171 it would put the machine into the wrong state. Also, we must
4172 avoid scheduling insns that have a latency longer than the
4173 remaining jump delay slots, as the code at the jump destination
4174 won't be prepared for it.
4175
4176 However, we can relax this condition somewhat. The rest of the
4177 scheduler will automatically avoid scheduling an insn on which
4178 the jump shadow depends so late that its side effect happens
4179 after the jump. This means that if we see an insn with a longer
4180 latency here, it can safely be scheduled if we can ensure that it
4181 has a predicate opposite of the previous jump: the side effect
4182 will happen in what we think of as the same basic block. In
4183 c6x_variable_issue, we will record the necessary predicate in
4184 new_conditions, and after scheduling is finished, we will modify
4185 the insn.
4186
4187 Special care must be taken whenever there is more than one jump
4188 in flight. */
4189
4190 first_jump = first_jump_index (clock_var);
4191 if (first_jump != -1)
4192 {
4193 int first_cycle = get_jump_cycle (first_jump);
4194 rtx first_cond = get_jump_cond (first_jump);
4195 int second_cycle = 0;
4196
4197 if (first_jump > 0)
4198 second_cycle = get_jump_cycle (first_jump - 1);
4199
4200 for (insnp = ready; insnp < e_ready; insnp++)
4201 {
4202 rtx insn = *insnp;
4203 int icode = recog_memoized (insn);
4204 bool is_asm = (icode < 0
4205 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4206 || asm_noperands (PATTERN (insn)) >= 0));
a905d913 4207 int this_cycles, rsrv_cycles;
5aa04b01 4208 enum attr_type type;
4209
4210 gcc_assert (!is_asm);
4211 if (icode < 0)
4212 continue;
4213 this_cycles = get_attr_cycles (insn);
a905d913 4214 rsrv_cycles = get_attr_reserve_cycles (insn);
5aa04b01 4215 type = get_attr_type (insn);
4216 /* Treat branches specially; there is also a hazard if two jumps
4217 end at the same cycle. */
4218 if (type == TYPE_BRANCH || type == TYPE_CALL)
4219 this_cycles++;
4220 if (clock_var + this_cycles <= first_cycle)
4221 continue;
4222 if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
a905d913 4223 || clock_var + rsrv_cycles > first_cycle
5aa04b01 4224 || !predicate_insn (insn, first_cond, false))
4225 {
4226 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4227 *ready = insn;
4228 n_ready--;
4229 ready++;
4230 }
4231 }
4232 }
4233
4234 return n_ready;
4235}
4236
4237/* Implement the TARGET_SCHED_REORDER hook. We save the current clock
4238 for later and clear the register access information for the new
4239 cycle. We also move asm statements out of the way if they would be
4240 scheduled in a delay slot. */
4241
4242static int
4243c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4244 int sched_verbose ATTRIBUTE_UNUSED,
4245 rtx *ready ATTRIBUTE_UNUSED,
4246 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4247{
4248 ss.curr_sched_clock = clock_var;
4249 ss.issued_this_cycle = 0;
4250 memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4251 memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4252
4253 if (ready == NULL)
4254 return 0;
4255
4256 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4257}
4258
4259/* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock
4260 cycle for every insn. */
4261
4262static int
4263c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4264 int sched_verbose ATTRIBUTE_UNUSED,
4265 rtx *ready ATTRIBUTE_UNUSED,
4266 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4267{
4268 /* FIXME: the assembler rejects labels inside an execute packet.
4269 This can occur if prologue insns are scheduled in parallel with
4270 others, so we avoid this here. Also make sure that nothing is
4271 scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */
4272 if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4273 || JUMP_P (ss.last_scheduled_insn)
4274 || (recog_memoized (ss.last_scheduled_insn) >= 0
4275 && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4276 {
4277 int n_ready = *pn_ready;
4278 rtx *e_ready = ready + n_ready;
4279 rtx *insnp;
4280
4281 for (insnp = ready; insnp < e_ready; insnp++)
4282 {
4283 rtx insn = *insnp;
4284 if (!shadow_p (insn))
4285 {
4286 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4287 *ready = insn;
4288 n_ready--;
4289 ready++;
4290 }
4291 }
4292 return n_ready;
4293 }
4294
4295 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4296}
4297
4298/* Subroutine of maybe_clobber_cond, called through note_stores. */
4299
4300static void
4301clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4302{
4303 rtx *cond = (rtx *)data1;
4304 if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4305 *cond = NULL_RTX;
4306}
4307
4308/* Examine INSN, and if it destroys the conditions have recorded for
4309 any of the jumps in flight, clear that condition so that we don't
4310 predicate any more insns. CLOCK_VAR helps us limit the search to
4311 only those jumps which are still in flight. */
4312
4313static void
4314maybe_clobber_cond (rtx insn, int clock_var)
4315{
4316 int n, idx;
4317 idx = ss.jump_cycle_index;
4318 for (n = 0; n < 12; n++, idx++)
4319 {
4320 rtx cond, link;
4321 int cycle;
4322
4323 if (idx >= 12)
4324 idx -= 12;
4325 cycle = ss.jump_cycles[idx];
4326 if (cycle <= clock_var)
4327 return;
4328
4329 cond = ss.jump_cond[idx];
4330 if (cond == NULL_RTX)
4331 continue;
4332
4333 if (CALL_P (insn))
4334 {
4335 ss.jump_cond[idx] = NULL_RTX;
4336 continue;
4337 }
4338
4339 note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4340 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4341 if (REG_NOTE_KIND (link) == REG_INC)
4342 clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4343 }
4344}
4345
4346/* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to
4347 issue INSN. Return the number of insns left on the ready queue
4348 that can be issued this cycle.
4349 We use this hook to record clock cycles and reservations for every insn. */
4350
4351static int
4352c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4353 int sched_verbose ATTRIBUTE_UNUSED,
4354 rtx insn, int can_issue_more ATTRIBUTE_UNUSED)
4355{
4356 ss.last_scheduled_insn = insn;
758df283 4357 if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4358 ss.last_scheduled_iter0 = insn;
5aa04b01 4359 if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4360 ss.issued_this_cycle++;
f1f41a6c 4361 if (insn_info.exists ())
5aa04b01 4362 {
87ded687 4363 state_t st_after = alloca (dfa_state_size);
5aa04b01 4364 int curr_clock = ss.curr_sched_clock;
4365 int uid = INSN_UID (insn);
4366 int icode = recog_memoized (insn);
4367 rtx first_cond;
4368 int first, first_cycle;
87ded687 4369 unsigned int mask;
4370 int i;
5aa04b01 4371
4372 insn_set_clock (insn, curr_clock);
4373 INSN_INFO_ENTRY (uid).ebb_start
4374 = curr_clock == 0 && ss.issued_this_cycle == 1;
4375
4376 first = first_jump_index (ss.curr_sched_clock);
4377 if (first == -1)
4378 {
4379 first_cycle = 0;
4380 first_cond = NULL_RTX;
4381 }
4382 else
4383 {
4384 first_cycle = get_jump_cycle (first);
4385 first_cond = get_jump_cond (first);
4386 }
4387 if (icode >= 0
4388 && first_cycle > curr_clock
4389 && first_cond != NULL_RTX
4390 && (curr_clock + get_attr_cycles (insn) > first_cycle
4391 || get_attr_type (insn) == TYPE_BRANCH
4392 || get_attr_type (insn) == TYPE_CALL))
4393 INSN_INFO_ENTRY (uid).new_cond = first_cond;
4394
87ded687 4395 memcpy (st_after, curr_state, dfa_state_size);
4396 state_transition (st_after, const0_rtx);
4397
4398 mask = 0;
4399 for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4400 if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4401 && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4402 mask |= 1 << i;
4403 INSN_INFO_ENTRY (uid).unit_mask = mask;
4404
5aa04b01 4405 maybe_clobber_cond (insn, curr_clock);
4406
4407 if (icode >= 0)
4408 {
4409 int i, cycles;
4410
4411 c6x_registers_update (insn);
4412 memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4413 sizeof ss.reg_n_accesses);
4414 memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4415 sizeof ss.reg_n_xaccesses);
4416
4417 cycles = get_attr_cycles (insn);
4418 if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4419 ss.delays_finished_at = ss.curr_sched_clock + cycles;
4420 if (get_attr_type (insn) == TYPE_BRANCH
4421 || get_attr_type (insn) == TYPE_CALL)
4422 {
4423 rtx opposite = condjump_opposite_condition (insn);
4424 record_jump (ss.curr_sched_clock + cycles, opposite);
4425 }
4426
4427 /* Mark the cycles in which the destination registers are written.
4428 This is used for calculating stalls when using cross units. */
4429 extract_insn (insn);
4430 /* Cross-path stalls don't apply to results of load insns. */
4431 if (get_attr_type (insn) == TYPE_LOAD
4432 || get_attr_type (insn) == TYPE_LOADN
4433 || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4434 cycles--;
4435 for (i = 0; i < recog_data.n_operands; i++)
4436 {
4437 rtx op = recog_data.operand[i];
4438 if (MEM_P (op))
4439 {
4440 rtx addr = XEXP (op, 0);
4441 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4442 c6x_mark_reg_written (XEXP (addr, 0),
4443 insn_uid_get_clock (uid) + 1);
4444 }
4445 if (recog_data.operand_type[i] != OP_IN
4446 && REG_P (op))
4447 {
4448 c6x_mark_reg_written (op,
4449 insn_uid_get_clock (uid) + cycles);
4450 }
4451 }
4452 }
4453 }
4454 return can_issue_more;
4455}
4456
4457/* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for
4458 anti- and output dependencies. */
4459
4460static int
4461c6x_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4462{
4463 enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4464 int dep_insn_code_number, insn_code_number;
4465 int shadow_bonus = 0;
4466 enum reg_note kind;
4467 dep_insn_code_number = recog_memoized (dep_insn);
4468 insn_code_number = recog_memoized (insn);
4469
4470 if (dep_insn_code_number >= 0)
4471 dep_insn_type = get_attr_type (dep_insn);
4472
4473 if (insn_code_number >= 0)
4474 insn_type = get_attr_type (insn);
4475
4476 kind = REG_NOTE_KIND (link);
4477 if (kind == 0)
4478 {
4479 /* If we have a dependency on a load, and it's not for the result of
4480 the load, it must be for an autoincrement. Reduce the cost in that
4481 case. */
4482 if (dep_insn_type == TYPE_LOAD)
4483 {
4484 rtx set = PATTERN (dep_insn);
4485 if (GET_CODE (set) == COND_EXEC)
4486 set = COND_EXEC_CODE (set);
4487 if (GET_CODE (set) == UNSPEC)
4488 cost = 1;
4489 else
4490 {
4491 gcc_assert (GET_CODE (set) == SET);
4492 if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4493 cost = 1;
4494 }
4495 }
4496 }
4497
4498 /* A jump shadow needs to have its latency decreased by one. Conceptually,
4499 it occurs in between two cycles, but we schedule it at the end of the
4500 first cycle. */
4501 if (shadow_type_p (insn_type))
4502 shadow_bonus = 1;
4503
4504 /* Anti and output dependencies usually have zero cost, but we want
4505 to insert a stall after a jump, and after certain floating point
4506 insns that take more than one cycle to read their inputs. In the
4507 future, we should try to find a better algorithm for scheduling
4508 jumps. */
4509 if (kind != 0)
4510 {
4511 /* We can get anti-dependencies against shadow insns. Treat these
4512 like output dependencies, so that the insn is entirely finished
4513 before the branch takes place. */
4514 if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4515 kind = REG_DEP_OUTPUT;
4516 switch (dep_insn_type)
4517 {
4518 case TYPE_CALLP:
4519 return 1;
4520 case TYPE_BRANCH:
4521 case TYPE_CALL:
4522 if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4523 /* This is a real_jump/real_call insn. These don't have
4524 outputs, and ensuring the validity of scheduling things
4525 in the delay slot is the job of
4526 c6x_sched_reorder_1. */
4527 return 0;
4528 /* Unsplit calls can happen - e.g. for divide insns. */
4529 return 6;
4530 case TYPE_LOAD:
4531 case TYPE_LOADN:
4532 case TYPE_INTDP:
4533 if (kind == REG_DEP_OUTPUT)
4534 return 5 - shadow_bonus;
4535 return 0;
4536 case TYPE_MPY4:
4537 case TYPE_FP4:
4538 if (kind == REG_DEP_OUTPUT)
4539 return 4 - shadow_bonus;
4540 return 0;
4541 case TYPE_MPY2:
4542 if (kind == REG_DEP_OUTPUT)
4543 return 2 - shadow_bonus;
4544 return 0;
4545 case TYPE_CMPDP:
4546 if (kind == REG_DEP_OUTPUT)
4547 return 2 - shadow_bonus;
4548 return 2;
4549 case TYPE_ADDDP:
4550 case TYPE_MPYSPDP:
4551 if (kind == REG_DEP_OUTPUT)
4552 return 7 - shadow_bonus;
4553 return 2;
4554 case TYPE_MPYSP2DP:
4555 if (kind == REG_DEP_OUTPUT)
4556 return 5 - shadow_bonus;
4557 return 2;
4558 case TYPE_MPYI:
4559 if (kind == REG_DEP_OUTPUT)
4560 return 9 - shadow_bonus;
4561 return 4;
4562 case TYPE_MPYID:
4563 case TYPE_MPYDP:
4564 if (kind == REG_DEP_OUTPUT)
4565 return 10 - shadow_bonus;
4566 return 4;
4567
4568 default:
4569 if (insn_type == TYPE_SPKERNEL)
4570 return 0;
4571 if (kind == REG_DEP_OUTPUT)
4572 return 1 - shadow_bonus;
4573
4574 return 0;
4575 }
4576 }
4577
4578 return cost - shadow_bonus;
4579}
4580\f
4581/* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4582 are N_FILLED. REAL_FIRST identifies the slot if the insn that appears
4583 first in the original stream. */
4584
4585static void
4586gen_one_bundle (rtx *slot, int n_filled, int real_first)
4587{
4588 rtx bundle;
4589 rtx t;
4590 int i;
4591
4592 bundle = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4593 bundle = make_insn_raw (bundle);
4594 BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
d53c050c 4595 INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
5aa04b01 4596 PREV_INSN (bundle) = PREV_INSN (slot[real_first]);
4597
4598 t = NULL_RTX;
4599
4600 for (i = 0; i < n_filled; i++)
4601 {
4602 rtx insn = slot[i];
4603 remove_insn (insn);
4604 PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4605 if (t != NULL_RTX)
4606 NEXT_INSN (t) = insn;
4607 t = insn;
4608 if (i > 0)
d53c050c 4609 INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
5aa04b01 4610 }
4611
4612 NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4613 NEXT_INSN (t) = NEXT_INSN (bundle);
4614 NEXT_INSN (PREV_INSN (bundle)) = bundle;
4615 PREV_INSN (NEXT_INSN (bundle)) = bundle;
4616}
4617
4618/* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4619 try to insert labels in the middle. */
4620
4621static void
4622c6x_gen_bundles (void)
4623{
4624 basic_block bb;
4625 rtx insn, next, last_call;
4626
4627 FOR_EACH_BB (bb)
4628 {
4629 rtx insn, next;
4630 /* The machine is eight insns wide. We can have up to six shadow
4631 insns, plus an extra slot for merging the jump shadow. */
4632 rtx slot[15];
4633 int n_filled = 0;
4634 int first_slot = 0;
4635
4636 for (insn = BB_HEAD (bb);; insn = next)
4637 {
4638 int at_end;
4639 rtx delete_this = NULL_RTX;
4640
4641 if (NONDEBUG_INSN_P (insn))
4642 {
4643 /* Put calls at the start of the sequence. */
4644 if (CALL_P (insn))
4645 {
4646 first_slot++;
4647 if (n_filled)
4648 {
4649 memmove (&slot[1], &slot[0],
4650 n_filled * sizeof (slot[0]));
4651 }
4652 if (!shadow_p (insn))
4653 {
4654 PUT_MODE (insn, TImode);
4655 if (n_filled)
4656 PUT_MODE (slot[1], VOIDmode);
4657 }
4658 n_filled++;
4659 slot[0] = insn;
4660 }
4661 else
4662 {
4663 slot[n_filled++] = insn;
4664 }
4665 }
4666
4667 next = NEXT_INSN (insn);
4668 while (next && insn != BB_END (bb)
4669 && !(NONDEBUG_INSN_P (next)
4670 && GET_CODE (PATTERN (next)) != USE
4671 && GET_CODE (PATTERN (next)) != CLOBBER))
4672 {
4673 insn = next;
4674 next = NEXT_INSN (insn);
4675 }
4676
4677 at_end = insn == BB_END (bb);
4678 if (delete_this == NULL_RTX
4679 && (at_end || (GET_MODE (next) == TImode
4680 && !(shadow_p (next) && CALL_P (next)))))
4681 {
4682 if (n_filled >= 2)
4683 gen_one_bundle (slot, n_filled, first_slot);
4684
4685 n_filled = 0;
4686 first_slot = 0;
4687 }
4688 if (at_end)
4689 break;
4690 }
4691 }
4692 /* Bundling, and emitting nops, can separate
4693 NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls. Fix
4694 that up here. */
4695 last_call = NULL_RTX;
4696 for (insn = get_insns (); insn; insn = next)
4697 {
4698 next = NEXT_INSN (insn);
4699 if (CALL_P (insn)
4700 || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
4701 && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
4702 last_call = insn;
4703 if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
4704 continue;
4705 if (NEXT_INSN (last_call) == insn)
4706 continue;
4707 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
4708 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
4709 PREV_INSN (insn) = last_call;
4710 NEXT_INSN (insn) = NEXT_INSN (last_call);
4711 PREV_INSN (NEXT_INSN (insn)) = insn;
4712 NEXT_INSN (PREV_INSN (insn)) = insn;
4713 last_call = insn;
4714 }
4715}
4716
4717/* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */
4718
4719static rtx
4720emit_nop_after (int cycles, rtx after)
4721{
4722 rtx insn;
4723
4724 /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4725 operation. We don't need the extra NOP since in this case, the hardware
4726 will automatically insert the required stall. */
4727 if (cycles == 10)
4728 cycles--;
4729
4730 gcc_assert (cycles < 10);
4731
4732 insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4733 PUT_MODE (insn, TImode);
4734
4735 return insn;
4736}
4737
4738/* Determine whether INSN is a call that needs to have a return label
4739 placed. */
4740
4741static bool
4742returning_call_p (rtx insn)
4743{
4744 if (CALL_P (insn))
4745 return (!SIBLING_CALL_P (insn)
4746 && get_attr_type (insn) != TYPE_CALLP
4747 && get_attr_type (insn) != TYPE_SHADOW);
4748 if (recog_memoized (insn) < 0)
4749 return false;
4750 if (get_attr_type (insn) == TYPE_CALL)
4751 return true;
4752 return false;
4753}
4754
4755/* Determine whether INSN's pattern can be converted to use callp. */
4756static bool
4757can_use_callp (rtx insn)
4758{
4759 int icode = recog_memoized (insn);
4760 if (!TARGET_INSNS_64PLUS
4761 || icode < 0
4762 || GET_CODE (PATTERN (insn)) == COND_EXEC)
4763 return false;
4764
4765 return ((icode == CODE_FOR_real_call
4766 || icode == CODE_FOR_call_internal
4767 || icode == CODE_FOR_call_value_internal)
4768 && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4769}
4770
4771/* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */
4772static void
4773convert_to_callp (rtx insn)
4774{
4775 rtx lab;
4776 extract_insn (insn);
4777 if (GET_CODE (PATTERN (insn)) == SET)
4778 {
4779 rtx dest = recog_data.operand[0];
4780 lab = recog_data.operand[1];
4781 PATTERN (insn) = gen_callp_value (dest, lab);
4782 INSN_CODE (insn) = CODE_FOR_callp_value;
4783 }
4784 else
4785 {
4786 lab = recog_data.operand[0];
4787 PATTERN (insn) = gen_callp (lab);
4788 INSN_CODE (insn) = CODE_FOR_callp;
4789 }
4790}
4791
4792/* Scan forwards from INSN until we find the next insn that has mode TImode
4793 (indicating it starts a new cycle), and occurs in cycle CLOCK.
4794 Return it if we find such an insn, NULL_RTX otherwise. */
4795static rtx
4796find_next_cycle_insn (rtx insn, int clock)
4797{
4798 rtx t = insn;
4799 if (GET_MODE (t) == TImode)
4800 t = next_real_insn (t);
4801 while (t && GET_MODE (t) != TImode)
4802 t = next_real_insn (t);
4803
4804 if (t && insn_get_clock (t) == clock)
4805 return t;
4806 return NULL_RTX;
4807}
4808
4809/* If COND_INSN has a COND_EXEC condition, wrap the same condition
4810 around PAT. Return PAT either unchanged or modified in this
4811 way. */
4812static rtx
4813duplicate_cond (rtx pat, rtx cond_insn)
4814{
4815 rtx cond_pat = PATTERN (cond_insn);
4816 if (GET_CODE (cond_pat) == COND_EXEC)
4817 pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4818 pat);
4819 return pat;
4820}
4821
4822/* Walk forward from INSN to find the last insn that issues in the same clock
4823 cycle. */
4824static rtx
4825find_last_same_clock (rtx insn)
4826{
4827 rtx retval = insn;
4828 rtx t = next_real_insn (insn);
4829
4830 while (t && GET_MODE (t) != TImode)
4831 {
4832 if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4833 retval = t;
4834 t = next_real_insn (t);
4835 }
4836 return retval;
4837}
4838
4839/* For every call insn in the function, emit code to load the return
4840 address. For each call we create a return label and store it in
4841 CALL_LABELS. If are not scheduling, we emit the labels here,
4842 otherwise the caller will do it later.
4843 This function is called after final insn scheduling, but before creating
4844 the SEQUENCEs that represent execute packets. */
4845
4846static void
4847reorg_split_calls (rtx *call_labels)
4848{
4849 unsigned int reservation_mask = 0;
4850 rtx insn = get_insns ();
4851 gcc_assert (GET_CODE (insn) == NOTE);
4852 insn = next_real_insn (insn);
4853 while (insn)
4854 {
4855 int uid;
4856 rtx next = next_real_insn (insn);
4857
4858 if (DEBUG_INSN_P (insn))
4859 goto done;
4860
4861 if (GET_MODE (insn) == TImode)
4862 reservation_mask = 0;
4863 uid = INSN_UID (insn);
4864 if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4865 reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4866
4867 if (returning_call_p (insn))
4868 {
4869 rtx label = gen_label_rtx ();
4870 rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4871 rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4872
4873 LABEL_NUSES (label) = 2;
4874 if (!c6x_flag_schedule_insns2)
4875 {
4876 if (can_use_callp (insn))
4877 convert_to_callp (insn);
4878 else
4879 {
4880 rtx t;
4881 rtx slot[4];
4882 emit_label_after (label, insn);
4883
4884 /* Bundle the call and its delay slots into a single
4885 SEQUENCE. While these do not issue in parallel
4886 we need to group them into a single EH region. */
4887 slot[0] = insn;
4888 PUT_MODE (insn, TImode);
4889 if (TARGET_INSNS_64)
4890 {
4891 t = gen_addkpc (reg, labelref, GEN_INT (4));
4892 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4893 insn);
4894 PUT_MODE (slot[1], TImode);
4895 gen_one_bundle (slot, 2, 0);
4896 }
4897 else
4898 {
4899 slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4900 insn);
4901 PUT_MODE (slot[3], TImode);
4902 t = gen_movsi_lo_sum (reg, reg, labelref);
4903 slot[2] = emit_insn_after (duplicate_cond (t, insn),
4904 insn);
4905 PUT_MODE (slot[2], TImode);
4906 t = gen_movsi_high (reg, labelref);
4907 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4908 insn);
4909 PUT_MODE (slot[1], TImode);
4910 gen_one_bundle (slot, 4, 0);
4911 }
4912 }
4913 }
4914 else
4915 {
4916 /* If we scheduled, we reserved the .S2 unit for one or two
4917 cycles after the call. Emit the insns in these slots,
4918 unless it's possible to create a CALLP insn.
4919 Note that this works because the dependencies ensure that
4920 no insn setting/using B3 is scheduled in the delay slots of
4921 a call. */
4922 int this_clock = insn_get_clock (insn);
4923 rtx last_same_clock;
4924 rtx after1;
4925
4926 call_labels[INSN_UID (insn)] = label;
4927
4928 last_same_clock = find_last_same_clock (insn);
4929
4930 if (can_use_callp (insn))
4931 {
4932 /* Find the first insn of the next execute packet. If it
87ded687 4933 is the shadow insn corresponding to this call, we may
5aa04b01 4934 use a CALLP insn. */
87ded687 4935 rtx shadow = next_nonnote_nondebug_insn (last_same_clock);
5aa04b01 4936
87ded687 4937 if (CALL_P (shadow)
4938 && insn_get_clock (shadow) == this_clock + 5)
5aa04b01 4939 {
87ded687 4940 convert_to_callp (shadow);
4941 insn_set_clock (shadow, this_clock);
4942 INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4943 = RESERVATION_S2;
4944 INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4945 = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
5aa04b01 4946 if (GET_MODE (insn) == TImode)
4947 {
4948 rtx new_cycle_first = NEXT_INSN (insn);
4949 while (!NONDEBUG_INSN_P (new_cycle_first)
4950 || GET_CODE (PATTERN (new_cycle_first)) == USE
4951 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4952 new_cycle_first = NEXT_INSN (new_cycle_first);
4953 PUT_MODE (new_cycle_first, TImode);
87ded687 4954 if (new_cycle_first != shadow)
4955 PUT_MODE (shadow, VOIDmode);
5aa04b01 4956 INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4957 = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4958 }
4959 else
87ded687 4960 PUT_MODE (shadow, VOIDmode);
5aa04b01 4961 delete_insn (insn);
4962 goto done;
4963 }
4964 }
4965 after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4966 if (after1 == NULL_RTX)
4967 after1 = last_same_clock;
4968 else
4969 after1 = find_last_same_clock (after1);
4970 if (TARGET_INSNS_64)
4971 {
4972 rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4973 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4974 insn_set_clock (x1, this_clock + 1);
4975 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4976 if (after1 == last_same_clock)
4977 PUT_MODE (x1, TImode);
87ded687 4978 else
4979 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4980 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
5aa04b01 4981 }
4982 else
4983 {
4984 rtx x1, x2;
4985 rtx after2 = find_next_cycle_insn (after1, this_clock + 2);
4986 if (after2 == NULL_RTX)
4987 after2 = after1;
4988 x2 = gen_movsi_lo_sum (reg, reg, labelref);
4989 x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
4990 x1 = gen_movsi_high (reg, labelref);
4991 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4992 insn_set_clock (x1, this_clock + 1);
4993 insn_set_clock (x2, this_clock + 2);
4994 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4995 INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
4996 if (after1 == last_same_clock)
4997 PUT_MODE (x1, TImode);
87ded687 4998 else
4999 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
5000 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
5aa04b01 5001 if (after1 == after2)
5002 PUT_MODE (x2, TImode);
87ded687 5003 else
5004 INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
5005 = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
5aa04b01 5006 }
5007 }
5008 }
5009 done:
5010 insn = next;
5011 }
5012}
5013
5014/* Called as part of c6x_reorg. This function emits multi-cycle NOP
5015 insns as required for correctness. CALL_LABELS is the array that
5016 holds the return labels for call insns; we emit these here if
5017 scheduling was run earlier. */
5018
5019static void
5020reorg_emit_nops (rtx *call_labels)
5021{
5022 bool first;
5023 rtx prev, last_call;
5024 int prev_clock, earliest_bb_end;
5025 int prev_implicit_nops;
5026 rtx insn = get_insns ();
5027
5028 /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5029 its issue time in PREV_CLOCK for the next iteration. If there is a gap in
5030 clocks, we must insert a NOP.
5031 EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5032 current basic block will finish. We must not allow the next basic block to
5033 begin before this cycle.
5034 PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5035 a multi-cycle nop. The code is scheduled such that subsequent insns will
5036 show the cycle gap, but we needn't insert a real NOP instruction. */
5037 insn = next_real_insn (insn);
5038 last_call = prev = NULL_RTX;
5039 prev_clock = -1;
5040 earliest_bb_end = 0;
5041 prev_implicit_nops = 0;
5042 first = true;
5043 while (insn)
5044 {
5045 int this_clock = -1;
5046 rtx next;
5047 int max_cycles = 0;
5048
5049 next = next_real_insn (insn);
5050
5051 if (DEBUG_INSN_P (insn)
5052 || GET_CODE (PATTERN (insn)) == USE
5053 || GET_CODE (PATTERN (insn)) == CLOBBER
5054 || shadow_or_blockage_p (insn)
5055 || (JUMP_P (insn)
5056 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5057 || GET_CODE (PATTERN (insn)) == ADDR_VEC)))
5058 goto next_insn;
5059
5060 if (!c6x_flag_schedule_insns2)
5061 /* No scheduling; ensure that no parallel issue happens. */
5062 PUT_MODE (insn, TImode);
5063 else
5064 {
5065 int cycles;
5066
5067 this_clock = insn_get_clock (insn);
5068 if (this_clock != prev_clock)
5069 {
5070 PUT_MODE (insn, TImode);
5071
5072 if (!first)
5073 {
5074 cycles = this_clock - prev_clock;
5075
5076 cycles -= prev_implicit_nops;
5077 if (cycles > 1)
5078 {
5079 rtx nop = emit_nop_after (cycles - 1, prev);
5080 insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5081 }
5082 }
5083 prev_clock = this_clock;
5084
5085 if (last_call
5086 && insn_get_clock (last_call) + 6 <= this_clock)
5087 {
5088 emit_label_before (call_labels[INSN_UID (last_call)], insn);
5089 last_call = NULL_RTX;
5090 }
5091 prev_implicit_nops = 0;
5092 }
5093 }
5094
5095 /* Examine how many cycles the current insn takes, and adjust
5096 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */
5097 if (recog_memoized (insn) >= 0
5098 /* If not scheduling, we've emitted NOPs after calls already. */
5099 && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5100 {
5101 max_cycles = get_attr_cycles (insn);
5102 if (get_attr_type (insn) == TYPE_CALLP)
5103 prev_implicit_nops = 5;
5104 }
5105 else
5106 max_cycles = 1;
5107 if (returning_call_p (insn))
5108 last_call = insn;
5109
5110 if (c6x_flag_schedule_insns2)
5111 {
5112 gcc_assert (this_clock >= 0);
5113 if (earliest_bb_end < this_clock + max_cycles)
5114 earliest_bb_end = this_clock + max_cycles;
5115 }
5116 else if (max_cycles > 1)
5117 emit_nop_after (max_cycles - 1, insn);
5118
5119 prev = insn;
5120 first = false;
5121
5122 next_insn:
5123 if (c6x_flag_schedule_insns2
5124 && (next == NULL_RTX
5125 || (GET_MODE (next) == TImode
5126 && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5127 && earliest_bb_end > 0)
5128 {
5129 int cycles = earliest_bb_end - prev_clock;
5130 if (cycles > 1)
5131 {
5132 prev = emit_nop_after (cycles - 1, prev);
5133 insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5134 }
5135 earliest_bb_end = 0;
5136 prev_clock = -1;
5137 first = true;
5138
5139 if (last_call)
5140 emit_label_after (call_labels[INSN_UID (last_call)], prev);
5141 last_call = NULL_RTX;
5142 }
5143 insn = next;
5144 }
5145}
5146
5147/* If possible, split INSN, which we know is either a jump or a call, into a real
5148 insn and its shadow. */
5149static void
5150split_delayed_branch (rtx insn)
5151{
5152 int code = recog_memoized (insn);
5153 rtx i1, newpat;
5154 rtx pat = PATTERN (insn);
5155
5156 if (GET_CODE (pat) == COND_EXEC)
5157 pat = COND_EXEC_CODE (pat);
5158
5159 if (CALL_P (insn))
5160 {
5161 rtx src = pat, dest = NULL_RTX;
5162 rtx callee;
5163 if (GET_CODE (pat) == SET)
5164 {
5165 dest = SET_DEST (pat);
5166 src = SET_SRC (pat);
5167 }
5168 callee = XEXP (XEXP (src, 0), 0);
5169 if (SIBLING_CALL_P (insn))
5170 {
5171 if (REG_P (callee))
5172 newpat = gen_indirect_sibcall_shadow ();
5173 else
5174 newpat = gen_sibcall_shadow (callee);
5175 pat = gen_real_jump (callee);
5176 }
5177 else if (dest != NULL_RTX)
5178 {
5179 if (REG_P (callee))
5180 newpat = gen_indirect_call_value_shadow (dest);
5181 else
5182 newpat = gen_call_value_shadow (dest, callee);
5183 pat = gen_real_call (callee);
5184 }
5185 else
5186 {
5187 if (REG_P (callee))
5188 newpat = gen_indirect_call_shadow ();
5189 else
5190 newpat = gen_call_shadow (callee);
5191 pat = gen_real_call (callee);
5192 }
5193 pat = duplicate_cond (pat, insn);
5194 newpat = duplicate_cond (newpat, insn);
5195 }
5196 else
5197 {
5198 rtx src, op;
5199 if (GET_CODE (pat) == PARALLEL
5200 && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5201 {
5202 newpat = gen_return_shadow ();
5203 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5204 newpat = duplicate_cond (newpat, insn);
5205 }
5206 else
5207 switch (code)
5208 {
5209 case CODE_FOR_br_true:
5210 case CODE_FOR_br_false:
5211 src = SET_SRC (pat);
5212 op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5213 newpat = gen_condjump_shadow (op);
5214 pat = gen_real_jump (op);
5215 if (code == CODE_FOR_br_true)
5216 pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5217 else
5218 pat = gen_rtx_COND_EXEC (VOIDmode,
5219 reversed_comparison (XEXP (src, 0),
5220 VOIDmode),
5221 pat);
5222 break;
5223
5224 case CODE_FOR_jump:
5225 op = SET_SRC (pat);
5226 newpat = gen_jump_shadow (op);
5227 break;
5228
5229 case CODE_FOR_indirect_jump:
5230 newpat = gen_indirect_jump_shadow ();
5231 break;
5232
5233 case CODE_FOR_return_internal:
5234 newpat = gen_return_shadow ();
5235 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5236 break;
5237
5238 default:
5239 return;
5240 }
5241 }
5242 i1 = emit_insn_before (pat, insn);
5243 PATTERN (insn) = newpat;
5244 INSN_CODE (insn) = -1;
c0457559 5245 record_delay_slot_pair (i1, insn, 5, 0);
5aa04b01 5246}
5247
758df283 5248/* If INSN is a multi-cycle insn that should be handled properly in
5249 modulo-scheduling, split it into a real insn and a shadow.
5250 Return true if we made a change.
5251
5252 It is valid for us to fail to split an insn; the caller has to deal
5253 with the possibility. Currently we handle loads and most mpy2 and
5254 mpy4 insns. */
5255static bool
5256split_delayed_nonbranch (rtx insn)
5257{
5258 int code = recog_memoized (insn);
5259 enum attr_type type;
5260 rtx i1, newpat, src, dest;
5261 rtx pat = PATTERN (insn);
5262 rtvec rtv;
5263 int delay;
5264
5265 if (GET_CODE (pat) == COND_EXEC)
5266 pat = COND_EXEC_CODE (pat);
5267
5268 if (code < 0 || GET_CODE (pat) != SET)
5269 return false;
5270 src = SET_SRC (pat);
5271 dest = SET_DEST (pat);
5272 if (!REG_P (dest))
5273 return false;
5274
5275 type = get_attr_type (insn);
5276 if (code >= 0
5277 && (type == TYPE_LOAD
5278 || type == TYPE_LOADN))
5279 {
5280 if (!MEM_P (src)
5281 && (GET_CODE (src) != ZERO_EXTEND
5282 || !MEM_P (XEXP (src, 0))))
5283 return false;
5284
5285 if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5286 && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5287 return false;
5288
5289 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5290 SET_SRC (pat));
5291 newpat = gen_load_shadow (SET_DEST (pat));
5292 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5293 delay = 4;
5294 }
5295 else if (code >= 0
5296 && (type == TYPE_MPY2
5297 || type == TYPE_MPY4))
5298 {
5299 /* We don't handle floating point multiplies yet. */
5300 if (GET_MODE (dest) == SFmode)
5301 return false;
5302
5303 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5304 SET_SRC (pat));
5305 newpat = gen_mult_shadow (SET_DEST (pat));
5306 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5307 delay = type == TYPE_MPY2 ? 1 : 3;
5308 }
5309 else
5310 return false;
5311
5312 pat = duplicate_cond (pat, insn);
5313 newpat = duplicate_cond (newpat, insn);
5314 i1 = emit_insn_before (pat, insn);
5315 PATTERN (insn) = newpat;
5316 INSN_CODE (insn) = -1;
5317 recog_memoized (insn);
5318 recog_memoized (i1);
5319 record_delay_slot_pair (i1, insn, delay, 0);
5320 return true;
5321}
5322
5323/* Examine if INSN is the result of splitting a load into a real load and a
5324 shadow, and if so, undo the transformation. */
5325static void
5326undo_split_delayed_nonbranch (rtx insn)
5327{
5328 int icode = recog_memoized (insn);
5329 enum attr_type type;
5330 rtx prev_pat, insn_pat, prev;
5331
5332 if (icode < 0)
5333 return;
5334 type = get_attr_type (insn);
5335 if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5336 return;
5337 prev = PREV_INSN (insn);
5338 prev_pat = PATTERN (prev);
5339 insn_pat = PATTERN (insn);
5340 if (GET_CODE (prev_pat) == COND_EXEC)
5341 {
5342 prev_pat = COND_EXEC_CODE (prev_pat);
5343 insn_pat = COND_EXEC_CODE (insn_pat);
5344 }
5345
5346 gcc_assert (GET_CODE (prev_pat) == UNSPEC
5347 && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5348 && type == TYPE_LOAD_SHADOW)
5349 || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5350 && type == TYPE_MULT_SHADOW)));
5351 insn_pat = gen_rtx_SET (VOIDmode, SET_DEST (insn_pat),
5352 XVECEXP (prev_pat, 0, 1));
5353 insn_pat = duplicate_cond (insn_pat, prev);
5354 PATTERN (insn) = insn_pat;
5355 INSN_CODE (insn) = -1;
5356 delete_insn (prev);
5357}
5358
5aa04b01 5359/* Split every insn (i.e. jumps and calls) which can have delay slots into
5360 two parts: the first one is scheduled normally and emits the instruction,
5361 while the second one is a shadow insn which shows the side effect taking
5362 place. The second one is placed in the right cycle by the scheduler, but
5363 not emitted as an assembly instruction. */
5364
5365static void
5366split_delayed_insns (void)
5367{
5368 rtx insn;
5369 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5370 {
5371 if (JUMP_P (insn) || CALL_P (insn))
5372 split_delayed_branch (insn);
5373 }
5374}
5375
5376/* For every insn that has an entry in the new_conditions vector, give it
5377 the appropriate predicate. */
5378static void
5379conditionalize_after_sched (void)
5380{
5381 basic_block bb;
5382 rtx insn;
5383 FOR_EACH_BB (bb)
5384 FOR_BB_INSNS (bb, insn)
5385 {
5386 unsigned uid = INSN_UID (insn);
5387 rtx cond;
5388 if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5389 continue;
5390 cond = INSN_INFO_ENTRY (uid).new_cond;
5391 if (cond == NULL_RTX)
5392 continue;
5393 if (dump_file)
5394 fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5395 predicate_insn (insn, cond, true);
5396 }
5397}
5398
758df283 5399/* A callback for the hw-doloop pass. This function examines INSN; if
5400 it is a loop_end pattern we recognize, return the reg rtx for the
5401 loop counter. Otherwise, return NULL_RTX. */
5402
5403static rtx
5404hwloop_pattern_reg (rtx insn)
5405{
5406 rtx pat, reg;
5407
5408 if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5409 return NULL_RTX;
5410
5411 pat = PATTERN (insn);
5412 reg = SET_DEST (XVECEXP (pat, 0, 1));
5413 if (!REG_P (reg))
5414 return NULL_RTX;
5415 return reg;
5416}
5417
5418/* Return the number of cycles taken by BB, as computed by scheduling,
5419 including the latencies of all insns with delay slots. IGNORE is
5420 an insn we should ignore in the calculation, usually the final
5421 branch. */
5422static int
5423bb_earliest_end_cycle (basic_block bb, rtx ignore)
5424{
5425 int earliest = 0;
5426 rtx insn;
5427
5428 FOR_BB_INSNS (bb, insn)
5429 {
5430 int cycles, this_clock;
5431
5432 if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5433 || GET_CODE (PATTERN (insn)) == USE
5434 || GET_CODE (PATTERN (insn)) == CLOBBER
5435 || insn == ignore)
5436 continue;
5437
5438 this_clock = insn_get_clock (insn);
5439 cycles = get_attr_cycles (insn);
5440
5441 if (earliest < this_clock + cycles)
5442 earliest = this_clock + cycles;
5443 }
5444 return earliest;
5445}
5446
5447/* Examine the insns in BB and remove all which have a uid greater or
5448 equal to MAX_UID. */
5449static void
5450filter_insns_above (basic_block bb, int max_uid)
5451{
5452 rtx insn, next;
5453 bool prev_ti = false;
5454 int prev_cycle = -1;
5455
5456 FOR_BB_INSNS_SAFE (bb, insn, next)
5457 {
5458 int this_cycle;
5459 if (!NONDEBUG_INSN_P (insn))
5460 continue;
5461 if (insn == BB_END (bb))
5462 return;
5463 this_cycle = insn_get_clock (insn);
5464 if (prev_ti && this_cycle == prev_cycle)
5465 {
5466 gcc_assert (GET_MODE (insn) != TImode);
5467 PUT_MODE (insn, TImode);
5468 }
5469 prev_ti = false;
5470 if (INSN_UID (insn) >= max_uid)
5471 {
5472 if (GET_MODE (insn) == TImode)
5473 {
5474 prev_ti = true;
5475 prev_cycle = this_cycle;
5476 }
5477 delete_insn (insn);
5478 }
5479 }
5480}
5481
9936b574 5482/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
5483
5484static void
5485c6x_asm_emit_except_personality (rtx personality)
5486{
5487 fputs ("\t.personality\t", asm_out_file);
5488 output_addr_const (asm_out_file, personality);
5489 fputc ('\n', asm_out_file);
5490}
5491
5492/* Use a special assembly directive rather than a regular setion for
5493 unwind table data. */
5494
5495static void
5496c6x_asm_init_sections (void)
5497{
5498 exception_section = get_unnamed_section (0, output_section_asm_op,
5499 "\t.handlerdata");
5500}
5501
758df283 5502/* A callback for the hw-doloop pass. Called to optimize LOOP in a
5503 machine-specific fashion; returns true if successful and false if
5504 the hwloop_fail function should be called. */
5505
5506static bool
5507hwloop_optimize (hwloop_info loop)
5508{
5509 basic_block entry_bb, bb;
5510 rtx seq, insn, prev, entry_after, end_packet;
5511 rtx head_insn, tail_insn, new_insns, last_insn;
6d004ec3 5512 int loop_earliest;
758df283 5513 int n_execute_packets;
5514 edge entry_edge;
5515 unsigned ix;
5516 int max_uid_before, delayed_splits;
5517 int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5518 rtx *orig_vec;
5519 rtx *copies;
5520 rtx **insn_copies;
5521
5522 if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5523 || !TARGET_INSNS_64PLUS)
5524 return false;
5525
5526 if (loop->iter_reg_used || loop->depth > 1)
5527 return false;
5528 if (loop->has_call || loop->has_asm)
5529 return false;
5530
5531 if (loop->head != loop->tail)
5532 return false;
5533
5534 gcc_assert (loop->incoming_dest == loop->head);
5535
5536 entry_edge = NULL;
f1f41a6c 5537 FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
758df283 5538 if (entry_edge->flags & EDGE_FALLTHRU)
5539 break;
5540 if (entry_edge == NULL)
5541 return false;
5542
9a4c7d5b 5543 reshuffle_units (loop->head);
5544
d452a169 5545 in_hwloop = true;
758df283 5546 schedule_ebbs_init ();
5547 schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5548 schedule_ebbs_finish ();
d452a169 5549 in_hwloop = false;
758df283 5550
5551 bb = loop->head;
5552 loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5553
5554 max_uid_before = get_max_uid ();
5555
5556 /* Split all multi-cycle operations, such as loads. For normal
5557 scheduling, we only do this for branches, as the generated code
5558 would otherwise not be interrupt-safe. When using sploop, it is
5559 safe and beneficial to split them. If any multi-cycle operations
5560 remain after splitting (because we don't handle them yet), we
5561 cannot pipeline the loop. */
5562 delayed_splits = 0;
5563 FOR_BB_INSNS (bb, insn)
5564 {
5565 if (NONDEBUG_INSN_P (insn))
5566 {
5567 recog_memoized (insn);
5568 if (split_delayed_nonbranch (insn))
5569 delayed_splits++;
5570 else if (INSN_CODE (insn) >= 0
5571 && get_attr_cycles (insn) > 1)
5572 goto undo_splits;
5573 }
5574 }
5575
5576 /* Count the number of insns as well as the number real insns, and save
5577 the original sequence of insns in case we must restore it later. */
5578 n_insns = n_real_insns = 0;
5579 FOR_BB_INSNS (bb, insn)
5580 {
5581 n_insns++;
5582 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5583 n_real_insns++;
5584 }
5585 orig_vec = XNEWVEC (rtx, n_insns);
5586 n_insns = 0;
5587 FOR_BB_INSNS (bb, insn)
5588 orig_vec[n_insns++] = insn;
5589
5590 /* Count the unit reservations, and compute a minimum II from that
5591 table. */
5592 count_unit_reqs (unit_reqs, loop->start_label,
5593 PREV_INSN (loop->loop_end));
5594 merge_unit_reqs (unit_reqs);
5595
5596 min_ii = res_mii (unit_reqs);
5597 max_ii = loop_earliest < 15 ? loop_earliest : 14;
5598
5599 /* Make copies of the loop body, up to a maximum number of stages we want
5600 to handle. */
5601 max_parallel = loop_earliest / min_ii + 1;
5602
5603 copies = XCNEWVEC (rtx, (max_parallel + 1) * n_real_insns);
5604 insn_copies = XNEWVEC (rtx *, max_parallel + 1);
5605 for (i = 0; i < max_parallel + 1; i++)
5606 insn_copies[i] = copies + i * n_real_insns;
5607
5608 head_insn = next_nonnote_nondebug_insn (loop->start_label);
5609 tail_insn = prev_real_insn (BB_END (bb));
5610
5611 i = 0;
5612 FOR_BB_INSNS (bb, insn)
5613 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5614 insn_copies[0][i++] = insn;
5615
5616 sploop_max_uid_iter0 = get_max_uid ();
5617
5618 /* Generate the copies of the loop body, and save them in the
5619 INSN_COPIES array. */
5620 start_sequence ();
5621 for (i = 0; i < max_parallel; i++)
5622 {
5623 int j;
5624 rtx this_iter;
5625
5626 this_iter = duplicate_insn_chain (head_insn, tail_insn);
5627 j = 0;
5628 while (this_iter)
5629 {
5630 rtx prev_stage_insn = insn_copies[i][j];
5631 gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5632
5633 if (INSN_CODE (this_iter) >= 0
5634 && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5635 || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5636 {
5637 rtx prev = PREV_INSN (this_iter);
5638 record_delay_slot_pair (prev, this_iter,
5639 get_attr_cycles (prev) - 1, 0);
5640 }
5641 else
5642 record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5643
5644 insn_copies[i + 1][j] = this_iter;
5645 j++;
5646 this_iter = next_nonnote_nondebug_insn (this_iter);
5647 }
5648 }
5649 new_insns = get_insns ();
5650 last_insn = insn_copies[max_parallel][n_real_insns - 1];
5651 end_sequence ();
5652 emit_insn_before (new_insns, BB_END (bb));
5653
5654 /* Try to schedule the loop using varying initiation intervals,
5655 starting with the smallest possible and incrementing it
5656 on failure. */
5657 for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5658 {
5659 basic_block tmp_bb;
5660 if (dump_file)
5661 fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5662
5663 df_clear_flags (DF_LR_RUN_DCE);
5664
5665 schedule_ebbs_init ();
5666 set_modulo_params (sp_ii, max_parallel, n_real_insns,
5667 sploop_max_uid_iter0);
5668 tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5669 schedule_ebbs_finish ();
5670
5671 if (tmp_bb)
5672 {
5673 if (dump_file)
5674 fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5675 break;
5676 }
5677 }
5678
5679 discard_delay_pairs_above (max_uid_before);
5680
5681 if (sp_ii > max_ii)
5682 goto restore_loop;
5683
5684 stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5685
5686 if (stages == 1 && sp_ii > 5)
5687 goto restore_loop;
5688
5689 /* At this point, we know we've been successful, unless we find later that
5690 there are too many execute packets for the loop buffer to hold. */
5691
5692 /* Assign reservations to the instructions in the loop. We must find
5693 the stage that contains the full loop kernel, and transfer the
5694 reservations of the instructions contained in it to the corresponding
5695 instructions from iteration 0, which are the only ones we'll keep. */
5696 assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5697 PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5698 NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5699 filter_insns_above (bb, sploop_max_uid_iter0);
5700
5701 for (i = 0; i < n_real_insns; i++)
5702 {
5703 rtx insn = insn_copies[0][i];
5704 int uid = INSN_UID (insn);
5705 int stage = insn_uid_get_clock (uid) / sp_ii;
5706
5707 if (stage + 1 < stages)
5708 {
5709 int copy_uid;
5710 stage = stages - stage - 1;
5711 copy_uid = INSN_UID (insn_copies[stage][i]);
5712 INSN_INFO_ENTRY (uid).reservation
5713 = INSN_INFO_ENTRY (copy_uid).reservation;
5714 }
5715 }
5716 if (stages == 1)
5717 stages++;
5718
5719 /* Compute the number of execute packets the pipelined form of the loop will
5720 require. */
5721 prev = NULL_RTX;
5722 n_execute_packets = 0;
5723 for (insn = loop->start_label; insn != loop->loop_end; insn = NEXT_INSN (insn))
5724 {
5725 if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5726 && !shadow_p (insn))
5727 {
5728 n_execute_packets++;
5729 if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5730 /* We need an extra NOP instruction. */
5731 n_execute_packets++;
5732
5733 prev = insn;
5734 }
5735 }
5736
5737 end_packet = ss.last_scheduled_iter0;
5738 while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5739 end_packet = PREV_INSN (end_packet);
5740
5741 /* The earliest cycle in which we can emit the SPKERNEL instruction. */
5742 loop_earliest = (stages - 1) * sp_ii;
5743 if (loop_earliest > insn_get_clock (end_packet))
5744 {
5745 n_execute_packets++;
5746 end_packet = loop->loop_end;
5747 }
5748 else
5749 loop_earliest = insn_get_clock (end_packet);
5750
5751 if (n_execute_packets > 14)
5752 goto restore_loop;
5753
5754 /* Generate the spkernel instruction, and place it at the appropriate
5755 spot. */
5756 PUT_MODE (end_packet, VOIDmode);
5757
5758 insn = gen_spkernel (GEN_INT (stages - 1),
5759 const0_rtx, JUMP_LABEL (loop->loop_end));
5760 insn = emit_jump_insn_before (insn, end_packet);
5761 JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5762 insn_set_clock (insn, loop_earliest);
5763 PUT_MODE (insn, TImode);
5764 INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5765 delete_insn (loop->loop_end);
5766
5767 /* Place the mvc and sploop instructions before the loop. */
5768 entry_bb = entry_edge->src;
5769
5770 start_sequence ();
5771
5772 insn = emit_insn (gen_mvilc (loop->iter_reg));
5773 insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5774
5775 seq = get_insns ();
5776
f1f41a6c 5777 if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
758df283 5778 {
5779 basic_block new_bb;
5780 edge e;
5781 edge_iterator ei;
5782
5783 emit_insn_before (seq, BB_HEAD (loop->head));
5784 seq = emit_label_before (gen_label_rtx (), seq);
5785
5786 new_bb = create_basic_block (seq, insn, entry_bb);
5787 FOR_EACH_EDGE (e, ei, loop->incoming)
5788 {
5789 if (!(e->flags & EDGE_FALLTHRU))
5790 redirect_edge_and_branch_force (e, new_bb);
5791 else
5792 redirect_edge_succ (e, new_bb);
5793 }
5794 make_edge (new_bb, loop->head, 0);
5795 }
5796 else
5797 {
5798 entry_after = BB_END (entry_bb);
5799 while (DEBUG_INSN_P (entry_after)
5800 || (NOTE_P (entry_after)
5801 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5802 entry_after = PREV_INSN (entry_after);
5803 emit_insn_after (seq, entry_after);
5804 }
5805
5806 end_sequence ();
5807
5808 /* Make sure we don't try to schedule this loop again. */
f1f41a6c 5809 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
758df283 5810 bb->flags |= BB_DISABLE_SCHEDULE;
5811
5812 return true;
5813
5814 restore_loop:
5815 if (dump_file)
5816 fprintf (dump_file, "Unable to pipeline loop.\n");
5817
5818 for (i = 1; i < n_insns; i++)
5819 {
5820 NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5821 PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5822 }
5823 PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5824 NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5825 NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5826 PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5827 BB_HEAD (bb) = orig_vec[0];
5828 BB_END (bb) = orig_vec[n_insns - 1];
5829 undo_splits:
5830 free_delay_pairs ();
5831 FOR_BB_INSNS (bb, insn)
5832 if (NONDEBUG_INSN_P (insn))
5833 undo_split_delayed_nonbranch (insn);
5834 return false;
5835}
5836
5837/* A callback for the hw-doloop pass. Called when a loop we have discovered
5838 turns out not to be optimizable; we have to split the doloop_end pattern
5839 into a subtract and a test. */
5840static void
5841hwloop_fail (hwloop_info loop)
5842{
5843 rtx insn, test, testreg;
5844
5845 if (dump_file)
5846 fprintf (dump_file, "splitting doloop insn %d\n",
5847 INSN_UID (loop->loop_end));
5848 insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5849 /* See if we can emit the add at the head of the loop rather than at the
5850 end. */
5851 if (loop->head == NULL
5852 || loop->iter_reg_used_outside
5853 || loop->iter_reg_used
5854 || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5855 || loop->incoming_dest != loop->head
5856 || EDGE_COUNT (loop->head->preds) != 2)
5857 emit_insn_before (insn, loop->loop_end);
5858 else
5859 {
5860 rtx t = loop->start_label;
5861 while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5862 t = NEXT_INSN (t);
5863 emit_insn_after (insn, t);
5864 }
5865
5866 testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5867 if (GET_CODE (testreg) == SCRATCH)
5868 testreg = loop->iter_reg;
5869 else
5870 emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5871
5872 test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5873 insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5874 loop->start_label),
5875 loop->loop_end);
5876
5877 JUMP_LABEL (insn) = loop->start_label;
5878 LABEL_NUSES (loop->start_label)++;
5879 delete_insn (loop->loop_end);
5880}
5881
5882static struct hw_doloop_hooks c6x_doloop_hooks =
5883{
5884 hwloop_pattern_reg,
5885 hwloop_optimize,
5886 hwloop_fail
5887};
5888
5889/* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5890 doloop_end patterns where such optimizations are impossible. */
5891static void
5892c6x_hwloops (void)
5893{
5894 if (optimize)
5895 reorg_loops (true, &c6x_doloop_hooks);
5896}
5897
5aa04b01 5898/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here
5899 into a sequence that loads the return register and performs the call,
5900 and emit the return label.
5901 If scheduling after reload is requested, it happens here. */
5902
5903static void
5904c6x_reorg (void)
5905{
5906 basic_block bb;
5907 rtx *call_labels;
5908 bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5909 && !maybe_skip_selective_scheduling ());
5910
5911 /* We are freeing block_for_insn in the toplev to keep compatibility
5912 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5913 compute_bb_for_insn ();
5914
5915 df_clear_flags (DF_LR_RUN_DCE);
9a4c7d5b 5916 df_note_add_problem ();
5aa04b01 5917
5918 /* If optimizing, we'll have split before scheduling. */
5919 if (optimize == 0)
5920 split_all_insns ();
5921
9a4c7d5b 5922 df_analyze ();
5923
5aa04b01 5924 if (c6x_flag_schedule_insns2)
5925 {
5926 int sz = get_max_uid () * 3 / 2 + 1;
5927
f1f41a6c 5928 insn_info.create (sz);
758df283 5929 }
5930
5931 /* Make sure the real-jump insns we create are not deleted. When modulo-
5932 scheduling, situations where a reg is only stored in a loop can also
5933 cause dead code when doing the initial unrolling. */
5934 sched_no_dce = true;
5aa04b01 5935
758df283 5936 c6x_hwloops ();
5aa04b01 5937
758df283 5938 if (c6x_flag_schedule_insns2)
5939 {
5aa04b01 5940 split_delayed_insns ();
5941 timevar_push (TV_SCHED2);
5942 if (do_selsched)
5943 run_selective_scheduling ();
5944 else
5945 schedule_ebbs ();
5946 conditionalize_after_sched ();
5947 timevar_pop (TV_SCHED2);
5948
5949 free_delay_pairs ();
5aa04b01 5950 }
758df283 5951 sched_no_dce = false;
5aa04b01 5952
5953 call_labels = XCNEWVEC (rtx, get_max_uid () + 1);
5954
5955 reorg_split_calls (call_labels);
5956
5957 if (c6x_flag_schedule_insns2)
5958 {
5959 FOR_EACH_BB (bb)
5960 if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5961 assign_reservations (BB_HEAD (bb), BB_END (bb));
5962 }
5963
5964 if (c6x_flag_var_tracking)
5965 {
5966 timevar_push (TV_VAR_TRACKING);
5967 variable_tracking_main ();
5968 timevar_pop (TV_VAR_TRACKING);
5969 }
5970
5971 reorg_emit_nops (call_labels);
5972
5973 /* Post-process the schedule to move parallel insns into SEQUENCEs. */
5974 if (c6x_flag_schedule_insns2)
5975 {
5976 free_delay_pairs ();
5977 c6x_gen_bundles ();
5978 }
5979
5980 df_finish_pass (false);
5981}
5982
5983/* Called when a function has been assembled. It should perform all the
5984 tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
5985 tasks.
5986 We free the reservation (and other scheduling) information here now that
5987 all insns have been output. */
5988void
5989c6x_function_end (FILE *file, const char *fname)
5990{
5991 c6x_output_fn_unwind (file);
5992
f1f41a6c 5993 insn_info.release ();
5aa04b01 5994
5995 if (!flag_inhibit_size_directive)
5996 ASM_OUTPUT_MEASURED_SIZE (file, fname);
5997}
5998\f
5999/* Determine whether X is a shift with code CODE and an integer amount
6000 AMOUNT. */
6001static bool
6002shift_p (rtx x, enum rtx_code code, int amount)
6003{
6004 return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
6005 && INTVAL (XEXP (x, 1)) == amount);
6006}
6007
6008/* Compute a (partial) cost for rtx X. Return true if the complete
6009 cost has been computed, and false if subexpressions should be
6010 scanned. In either case, *TOTAL contains the cost result. */
6011
6012static bool
20d892d1 6013c6x_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
6014 bool speed)
5aa04b01 6015{
6016 int cost2 = COSTS_N_INSNS (1);
6017 rtx op0, op1;
6018
6019 switch (code)
6020 {
6021 case CONST_INT:
6022 if (outer_code == SET || outer_code == PLUS)
6023 *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6024 else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6025 || outer_code == MINUS)
6026 *total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6027 else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6028 || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6029 *total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6030 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6031 || outer_code == LSHIFTRT)
6032 *total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6033 else
6034 *total = cost2;
6035 return true;
6036
6037 case CONST:
6038 case LABEL_REF:
6039 case SYMBOL_REF:
6040 case CONST_DOUBLE:
6041 *total = COSTS_N_INSNS (2);
6042 return true;
6043
6044 case TRUNCATE:
6045 /* Recognize a mult_highpart operation. */
6046 if ((GET_MODE (x) == HImode || GET_MODE (x) == SImode)
6047 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6048 && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (GET_MODE (x))
6049 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6050 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6051 && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (GET_MODE (x)))
6052 {
6053 rtx mul = XEXP (XEXP (x, 0), 0);
6054 rtx op0 = XEXP (mul, 0);
6055 rtx op1 = XEXP (mul, 1);
6056 enum rtx_code code0 = GET_CODE (op0);
6057 enum rtx_code code1 = GET_CODE (op1);
6058
6059 if ((code0 == code1
6060 && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6061 || (GET_MODE (x) == HImode
6062 && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6063 {
6064 if (GET_MODE (x) == HImode)
6065 *total = COSTS_N_INSNS (2);
6066 else
6067 *total = COSTS_N_INSNS (12);
20d892d1 6068 *total += rtx_cost (XEXP (op0, 0), code0, 0, speed);
6069 *total += rtx_cost (XEXP (op1, 0), code1, 0, speed);
5aa04b01 6070 return true;
6071 }
6072 }
6073 return false;
6074
6075 case ASHIFT:
6076 case ASHIFTRT:
6077 case LSHIFTRT:
6078 if (GET_MODE (x) == DImode)
6079 *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6080 else
6081 *total = COSTS_N_INSNS (1);
6082 return false;
6083
6084 case PLUS:
6085 case MINUS:
6086 *total = COSTS_N_INSNS (1);
6087 op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6088 op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6089 if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD
6090 && INTEGRAL_MODE_P (GET_MODE (x))
6091 && GET_CODE (op0) == MULT
6092 && GET_CODE (XEXP (op0, 1)) == CONST_INT
6093 && (INTVAL (XEXP (op0, 1)) == 2
6094 || INTVAL (XEXP (op0, 1)) == 4
6095 || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6096 {
20d892d1 6097 *total += rtx_cost (XEXP (op0, 0), ASHIFT, 0, speed);
6098 *total += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5aa04b01 6099 return true;
6100 }
6101 return false;
6102
6103 case MULT:
6104 op0 = XEXP (x, 0);
6105 op1 = XEXP (x, 1);
6106 if (GET_MODE (x) == DFmode)
6107 {
6108 if (TARGET_FP)
6109 *total = COSTS_N_INSNS (speed ? 10 : 1);
6110 else
6111 *total = COSTS_N_INSNS (speed ? 200 : 4);
6112 }
6113 else if (GET_MODE (x) == SFmode)
6114 {
6115 if (TARGET_FP)
6116 *total = COSTS_N_INSNS (speed ? 4 : 1);
6117 else
6118 *total = COSTS_N_INSNS (speed ? 100 : 4);
6119 }
6120 else if (GET_MODE (x) == DImode)
6121 {
6122 if (TARGET_MPY32
6123 && GET_CODE (op0) == GET_CODE (op1)
6124 && (GET_CODE (op0) == ZERO_EXTEND
6125 || GET_CODE (op0) == SIGN_EXTEND))
6126 {
6127 *total = COSTS_N_INSNS (speed ? 2 : 1);
6128 op0 = XEXP (op0, 0);
6129 op1 = XEXP (op1, 0);
6130 }
6131 else
6132 /* Maybe improve this laster. */
6133 *total = COSTS_N_INSNS (20);
6134 }
6135 else if (GET_MODE (x) == SImode)
6136 {
6137 if (((GET_CODE (op0) == ZERO_EXTEND
6138 || GET_CODE (op0) == SIGN_EXTEND
6139 || shift_p (op0, LSHIFTRT, 16))
6140 && (GET_CODE (op1) == SIGN_EXTEND
6141 || GET_CODE (op1) == ZERO_EXTEND
6142 || scst5_operand (op1, SImode)
6143 || shift_p (op1, ASHIFTRT, 16)
6144 || shift_p (op1, LSHIFTRT, 16)))
6145 || (shift_p (op0, ASHIFTRT, 16)
6146 && (GET_CODE (op1) == SIGN_EXTEND
6147 || shift_p (op1, ASHIFTRT, 16))))
6148 {
6149 *total = COSTS_N_INSNS (speed ? 2 : 1);
6150 op0 = XEXP (op0, 0);
6151 if (scst5_operand (op1, SImode))
6152 op1 = NULL_RTX;
6153 else
6154 op1 = XEXP (op1, 0);
6155 }
6156 else if (!speed)
6157 *total = COSTS_N_INSNS (1);
6158 else if (TARGET_MPY32)
6159 *total = COSTS_N_INSNS (4);
6160 else
6161 *total = COSTS_N_INSNS (6);
6162 }
6163 else if (GET_MODE (x) == HImode)
6164 *total = COSTS_N_INSNS (speed ? 2 : 1);
6165
6166 if (GET_CODE (op0) != REG
6167 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
20d892d1 6168 *total += rtx_cost (op0, MULT, 0, speed);
5aa04b01 6169 if (op1 && GET_CODE (op1) != REG
6170 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
20d892d1 6171 *total += rtx_cost (op1, MULT, 1, speed);
5aa04b01 6172 return true;
6173
6174 case UDIV:
6175 case DIV:
6176 /* This is a bit random; assuming on average there'll be 16 leading
6177 zeros. FIXME: estimate better for constant dividends. */
6178 *total = COSTS_N_INSNS (6 + 3 * 16);
6179 return false;
6180
6181 case IF_THEN_ELSE:
6182 /* Recognize the cmp_and/ior patterns. */
6183 op0 = XEXP (x, 0);
6184 if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6185 && REG_P (XEXP (op0, 0))
6186 && XEXP (op0, 1) == const0_rtx
6187 && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6188 {
20d892d1 6189 *total = rtx_cost (XEXP (x, 1), (enum rtx_code) outer_code,
6190 opno, speed);
5aa04b01 6191 return false;
6192 }
6193 return false;
6194
6195 default:
6196 return false;
6197 }
6198}
6199
6200/* Implements target hook vector_mode_supported_p. */
6201
6202static bool
6203c6x_vector_mode_supported_p (enum machine_mode mode)
6204{
6205 switch (mode)
6206 {
6207 case V2HImode:
6208 case V4QImode:
6209 case V2SImode:
6210 case V4HImode:
6211 case V8QImode:
6212 return true;
6213 default:
6214 return false;
6215 }
6216}
6217
6218/* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6219static enum machine_mode
6220c6x_preferred_simd_mode (enum machine_mode mode)
6221{
6222 switch (mode)
6223 {
6224 case HImode:
6225 return V2HImode;
6226 case QImode:
6227 return V4QImode;
6228
6229 default:
6230 return word_mode;
6231 }
6232}
6233
6234/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6235
6236static bool
6237c6x_scalar_mode_supported_p (enum machine_mode mode)
6238{
6239 if (ALL_FIXED_POINT_MODE_P (mode)
6240 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6241 return true;
6242
6243 return default_scalar_mode_supported_p (mode);
6244}
6245
6246/* Output a reference from a function exception table to the type_info
6247 object X. Output these via a special assembly directive. */
6248
6249static bool
6250c6x_output_ttype (rtx x)
6251{
6252 /* Use special relocations for symbol references. */
6253 if (GET_CODE (x) != CONST_INT)
6254 fputs ("\t.ehtype\t", asm_out_file);
6255 else
6256 fputs ("\t.word\t", asm_out_file);
6257 output_addr_const (asm_out_file, x);
6258 fputc ('\n', asm_out_file);
6259
6260 return TRUE;
6261}
6262
6263/* Modify the return address of the current function. */
6264
6265void
6266c6x_set_return_address (rtx source, rtx scratch)
6267{
6268 struct c6x_frame frame;
6269 rtx addr;
6270 HOST_WIDE_INT offset;
6271
6272 c6x_compute_frame_layout (&frame);
6273 if (! c6x_save_reg (RETURN_ADDR_REGNO))
6274 emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6275 else
6276 {
6277
6278 if (frame_pointer_needed)
6279 {
6280 addr = hard_frame_pointer_rtx;
6281 offset = frame.b3_offset;
6282 }
6283 else
6284 {
6285 addr = stack_pointer_rtx;
6286 offset = frame.to_allocate - frame.b3_offset;
6287 }
6288
6289 /* TODO: Use base+offset loads where possible. */
6290 if (offset)
6291 {
6292 HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6293
6294 emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6295 if (low != offset)
6296 emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6297 emit_insn (gen_addsi3 (scratch, addr, scratch));
6298 addr = scratch;
6299 }
6300
6301 emit_move_insn (gen_frame_mem (Pmode, addr), source);
6302 }
6303}
6304
6305/* We save pairs of registers using a DImode store. Describe the component
6306 registers for DWARF generation code. */
6307
6308static rtx
6309c6x_dwarf_register_span (rtx rtl)
6310{
6311 unsigned regno;
6312 unsigned real_regno;
6313 int nregs;
6314 int i;
6315 rtx p;
6316
6317 regno = REGNO (rtl);
6318 nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
6319 if (nregs == 1)
6320 return NULL_RTX;
6321
6322 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6323 for (i = 0; i < nregs; i++)
6324 {
6325 if (TARGET_BIG_ENDIAN)
6326 real_regno = regno + nregs - (i + 1);
6327 else
6328 real_regno = regno + i;
6329
6330 XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6331 }
6332
6333 return p;
6334}
6335\f
6336/* Codes for all the C6X builtins. */
6337enum c6x_builtins
6338{
6339 C6X_BUILTIN_SADD,
6340 C6X_BUILTIN_SSUB,
6341 C6X_BUILTIN_ADD2,
6342 C6X_BUILTIN_SUB2,
6343 C6X_BUILTIN_ADD4,
6344 C6X_BUILTIN_SUB4,
6345 C6X_BUILTIN_SADD2,
6346 C6X_BUILTIN_SSUB2,
6347 C6X_BUILTIN_SADDU4,
6348
6349 C6X_BUILTIN_SMPY,
6350 C6X_BUILTIN_SMPYH,
6351 C6X_BUILTIN_SMPYHL,
6352 C6X_BUILTIN_SMPYLH,
6353 C6X_BUILTIN_MPY2,
6354 C6X_BUILTIN_SMPY2,
6355
6356 C6X_BUILTIN_CLRR,
6357 C6X_BUILTIN_EXTR,
6358 C6X_BUILTIN_EXTRU,
6359
6360 C6X_BUILTIN_SSHL,
6361 C6X_BUILTIN_SUBC,
6362 C6X_BUILTIN_ABS,
6363 C6X_BUILTIN_ABS2,
6364 C6X_BUILTIN_AVG2,
6365 C6X_BUILTIN_AVGU4,
6366
6367 C6X_BUILTIN_MAX
6368};
6369
6370
6371static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6372
6373/* Return the C6X builtin for CODE. */
6374static tree
6375c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6376{
6377 if (code >= C6X_BUILTIN_MAX)
6378 return error_mark_node;
6379
6380 return c6x_builtin_decls[code];
6381}
6382
6383#define def_builtin(NAME, TYPE, CODE) \
6384do { \
6385 tree bdecl; \
6386 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
6387 NULL, NULL_TREE); \
6388 c6x_builtin_decls[CODE] = bdecl; \
6389} while (0)
6390
6391/* Set up all builtin functions for this target. */
6392static void
6393c6x_init_builtins (void)
6394{
6395 tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6396 tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6397 tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6398 tree int_ftype_int
6399 = build_function_type_list (integer_type_node, integer_type_node,
6400 NULL_TREE);
6401 tree int_ftype_int_int
6402 = build_function_type_list (integer_type_node, integer_type_node,
6403 integer_type_node, NULL_TREE);
6404 tree v2hi_ftype_v2hi
6405 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6406 tree v4qi_ftype_v4qi_v4qi
6407 = build_function_type_list (V4QI_type_node, V4QI_type_node,
6408 V4QI_type_node, NULL_TREE);
6409 tree v2hi_ftype_v2hi_v2hi
6410 = build_function_type_list (V2HI_type_node, V2HI_type_node,
6411 V2HI_type_node, NULL_TREE);
6412 tree v2si_ftype_v2hi_v2hi
6413 = build_function_type_list (V2SI_type_node, V2HI_type_node,
6414 V2HI_type_node, NULL_TREE);
6415
6416 def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6417 C6X_BUILTIN_SADD);
6418 def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6419 C6X_BUILTIN_SSUB);
6420 def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6421 C6X_BUILTIN_ADD2);
6422 def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6423 C6X_BUILTIN_SUB2);
6424 def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6425 C6X_BUILTIN_ADD4);
6426 def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6427 C6X_BUILTIN_SUB4);
6428 def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6429 C6X_BUILTIN_MPY2);
6430 def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6431 C6X_BUILTIN_SADD2);
6432 def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6433 C6X_BUILTIN_SSUB2);
6434 def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6435 C6X_BUILTIN_SADDU4);
6436 def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6437 C6X_BUILTIN_SMPY2);
6438
6439 def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6440 C6X_BUILTIN_SMPY);
6441 def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6442 C6X_BUILTIN_SMPYH);
6443 def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6444 C6X_BUILTIN_SMPYHL);
6445 def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6446 C6X_BUILTIN_SMPYLH);
6447
6448 def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6449 C6X_BUILTIN_SSHL);
6450 def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6451 C6X_BUILTIN_SUBC);
6452
6453 def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6454 C6X_BUILTIN_AVG2);
6455 def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6456 C6X_BUILTIN_AVGU4);
6457
6458 def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6459 C6X_BUILTIN_CLRR);
6460 def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6461 C6X_BUILTIN_EXTR);
6462 def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6463 C6X_BUILTIN_EXTRU);
6464
6465 def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6466 def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6467}
6468
6469
6470struct builtin_description
6471{
6472 const enum insn_code icode;
6473 const char *const name;
6474 const enum c6x_builtins code;
6475};
6476
6477static const struct builtin_description bdesc_2arg[] =
6478{
6479 { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6480 { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6481 { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6482 { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6483 { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6484 { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6485 { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6486 { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6487 { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6488
6489 { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6490 { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6491
6492 { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6493 { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6494
6495 { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6496 { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6497 { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6498 { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6499
6500 { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6501
6502 { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6503 { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6504 { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6505};
6506
6507static const struct builtin_description bdesc_1arg[] =
6508{
6509 { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6510 { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6511};
6512
6513/* Errors in the source file can cause expand_expr to return const0_rtx
6514 where we expect a vector. To avoid crashing, use one of the vector
6515 clear instructions. */
6516static rtx
6517safe_vector_operand (rtx x, enum machine_mode mode)
6518{
6519 if (x != const0_rtx)
6520 return x;
6521 x = gen_reg_rtx (SImode);
6522
6523 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6524 return gen_lowpart (mode, x);
6525}
6526
6527/* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1
6528 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6529
6530static rtx
6531c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6532 bool match_op)
6533{
6534 int offs = match_op ? 1 : 0;
6535 rtx pat;
6536 tree arg0 = CALL_EXPR_ARG (exp, 0);
6537 tree arg1 = CALL_EXPR_ARG (exp, 1);
6538 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6539 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6540 enum machine_mode op0mode = GET_MODE (op0);
6541 enum machine_mode op1mode = GET_MODE (op1);
6542 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6543 enum machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6544 enum machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6545 rtx ret = target;
6546
6547 if (VECTOR_MODE_P (mode0))
6548 op0 = safe_vector_operand (op0, mode0);
6549 if (VECTOR_MODE_P (mode1))
6550 op1 = safe_vector_operand (op1, mode1);
6551
6552 if (! target
6553 || GET_MODE (target) != tmode
6554 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6555 {
6556 if (tmode == SQmode || tmode == V2SQmode)
6557 {
6558 ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6559 target = gen_lowpart (tmode, ret);
6560 }
6561 else
6562 target = gen_reg_rtx (tmode);
6563 }
6564
6565 if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6566 && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6567 {
6568 op0mode = mode0;
6569 op0 = gen_lowpart (mode0, op0);
6570 }
6571 if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6572 && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6573 {
6574 op1mode = mode1;
6575 op1 = gen_lowpart (mode1, op1);
6576 }
6577 /* In case the insn wants input operands in modes different from
6578 the result, abort. */
6579 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6580 && (op1mode == mode1 || op1mode == VOIDmode));
6581
6582 if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6583 op0 = copy_to_mode_reg (mode0, op0);
6584 if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6585 op1 = copy_to_mode_reg (mode1, op1);
6586
6587 if (match_op)
6588 pat = GEN_FCN (icode) (target, target, op0, op1);
6589 else
6590 pat = GEN_FCN (icode) (target, op0, op1);
6591
6592 if (! pat)
6593 return 0;
6594
6595 emit_insn (pat);
6596
6597 return ret;
6598}
6599
6600/* Subroutine of c6x_expand_builtin to take care of unop insns. */
6601
6602static rtx
6603c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6604 rtx target)
6605{
6606 rtx pat;
6607 tree arg0 = CALL_EXPR_ARG (exp, 0);
6608 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6609 enum machine_mode op0mode = GET_MODE (op0);
6610 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6611 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6612
6613 if (! target
6614 || GET_MODE (target) != tmode
6615 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6616 target = gen_reg_rtx (tmode);
6617
6618 if (VECTOR_MODE_P (mode0))
6619 op0 = safe_vector_operand (op0, mode0);
6620
6621 if (op0mode == SImode && mode0 == HImode)
6622 {
6623 op0mode = HImode;
6624 op0 = gen_lowpart (HImode, op0);
6625 }
6626 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6627
6628 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6629 op0 = copy_to_mode_reg (mode0, op0);
6630
6631 pat = GEN_FCN (icode) (target, op0);
6632 if (! pat)
6633 return 0;
6634 emit_insn (pat);
6635 return target;
6636}
6637
6638/* Expand an expression EXP that calls a built-in function,
6639 with result going to TARGET if that's convenient
6640 (and in mode MODE if that's convenient).
6641 SUBTARGET may be used as the target for computing one of EXP's operands.
6642 IGNORE is nonzero if the value is to be ignored. */
6643
6644static rtx
6645c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6646 rtx subtarget ATTRIBUTE_UNUSED,
6647 enum machine_mode mode ATTRIBUTE_UNUSED,
6648 int ignore ATTRIBUTE_UNUSED)
6649{
6650 size_t i;
6651 const struct builtin_description *d;
6652 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6653 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6654
6655 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6656 if (d->code == fcode)
6657 return c6x_expand_binop_builtin (d->icode, exp, target,
6658 fcode == C6X_BUILTIN_CLRR);
6659
6660 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6661 if (d->code == fcode)
6662 return c6x_expand_unop_builtin (d->icode, exp, target);
6663
6664 gcc_unreachable ();
6665}
7e5fc0c4 6666
6667/* Target unwind frame info is generated from dwarf CFI directives, so
6668 always output dwarf2 unwind info. */
6669
6670static enum unwind_info_type
6671c6x_debug_unwind_info (void)
6672{
6673 if (flag_unwind_tables || flag_exceptions)
6674 return UI_DWARF2;
6675
6676 return default_debug_unwind_info ();
6677}
5aa04b01 6678\f
6679/* Target Structure. */
6680
6681/* Initialize the GCC target structure. */
6682#undef TARGET_FUNCTION_ARG
6683#define TARGET_FUNCTION_ARG c6x_function_arg
6684#undef TARGET_FUNCTION_ARG_ADVANCE
6685#define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6686#undef TARGET_FUNCTION_ARG_BOUNDARY
6687#define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6688#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6689#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6690 c6x_function_arg_round_boundary
6691#undef TARGET_FUNCTION_VALUE_REGNO_P
6692#define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6693#undef TARGET_FUNCTION_VALUE
6694#define TARGET_FUNCTION_VALUE c6x_function_value
6695#undef TARGET_LIBCALL_VALUE
6696#define TARGET_LIBCALL_VALUE c6x_libcall_value
6697#undef TARGET_RETURN_IN_MEMORY
6698#define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6699#undef TARGET_RETURN_IN_MSB
6700#define TARGET_RETURN_IN_MSB c6x_return_in_msb
6701#undef TARGET_PASS_BY_REFERENCE
6702#define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6703#undef TARGET_CALLEE_COPIES
6704#define TARGET_CALLEE_COPIES c6x_callee_copies
6705#undef TARGET_STRUCT_VALUE_RTX
6706#define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6707#undef TARGET_FUNCTION_OK_FOR_SIBCALL
6708#define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6709
6710#undef TARGET_ASM_OUTPUT_MI_THUNK
6711#define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6712#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6713#define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6714
6715#undef TARGET_BUILD_BUILTIN_VA_LIST
6716#define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6717
6718#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6719#define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6720#undef TARGET_TRAMPOLINE_INIT
6721#define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6722
6723#undef TARGET_LEGITIMATE_CONSTANT_P
6724#define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6725#undef TARGET_LEGITIMATE_ADDRESS_P
6726#define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6727
6728#undef TARGET_IN_SMALL_DATA_P
6729#define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6730#undef TARGET_ASM_SELECT_RTX_SECTION
6731#define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section
6732#undef TARGET_ASM_SELECT_SECTION
6733#define TARGET_ASM_SELECT_SECTION c6x_elf_select_section
6734#undef TARGET_ASM_UNIQUE_SECTION
6735#define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section
6736#undef TARGET_SECTION_TYPE_FLAGS
6737#define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags
6738#undef TARGET_HAVE_SRODATA_SECTION
6739#define TARGET_HAVE_SRODATA_SECTION true
6740#undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6741#define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6742
6743#undef TARGET_OPTION_OVERRIDE
6744#define TARGET_OPTION_OVERRIDE c6x_option_override
6745#undef TARGET_CONDITIONAL_REGISTER_USAGE
6746#define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6747
6748#undef TARGET_INIT_LIBFUNCS
6749#define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6750#undef TARGET_LIBFUNC_GNU_PREFIX
6751#define TARGET_LIBFUNC_GNU_PREFIX true
6752
6753#undef TARGET_SCALAR_MODE_SUPPORTED_P
6754#define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6755#undef TARGET_VECTOR_MODE_SUPPORTED_P
6756#define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6757#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6758#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6759
6760#undef TARGET_RTX_COSTS
6761#define TARGET_RTX_COSTS c6x_rtx_costs
6762
6763#undef TARGET_SCHED_INIT
6764#define TARGET_SCHED_INIT c6x_sched_init
6765#undef TARGET_SCHED_SET_SCHED_FLAGS
6766#define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6767#undef TARGET_SCHED_ADJUST_COST
6768#define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6769#undef TARGET_SCHED_ISSUE_RATE
6770#define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6771#undef TARGET_SCHED_VARIABLE_ISSUE
6772#define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6773#undef TARGET_SCHED_REORDER
6774#define TARGET_SCHED_REORDER c6x_sched_reorder
6775#undef TARGET_SCHED_REORDER2
6776#define TARGET_SCHED_REORDER2 c6x_sched_reorder2
87ded687 6777#undef TARGET_SCHED_DFA_NEW_CYCLE
6778#define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6779#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6780#define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
5aa04b01 6781#undef TARGET_SCHED_EXPOSED_PIPELINE
6782#define TARGET_SCHED_EXPOSED_PIPELINE true
6783
6784#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6785#define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6786#undef TARGET_SCHED_INIT_SCHED_CONTEXT
6787#define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6788#undef TARGET_SCHED_SET_SCHED_CONTEXT
6789#define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
87ded687 6790#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6791#define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
5aa04b01 6792#undef TARGET_SCHED_FREE_SCHED_CONTEXT
6793#define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6794
6795#undef TARGET_CAN_ELIMINATE
6796#define TARGET_CAN_ELIMINATE c6x_can_eliminate
6797
6798#undef TARGET_PREFERRED_RENAME_CLASS
6799#define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6800
6801#undef TARGET_MACHINE_DEPENDENT_REORG
6802#define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6803
6804#undef TARGET_ASM_FILE_START
6805#define TARGET_ASM_FILE_START c6x_file_start
6806
6807#undef TARGET_PRINT_OPERAND
6808#define TARGET_PRINT_OPERAND c6x_print_operand
6809#undef TARGET_PRINT_OPERAND_ADDRESS
6810#define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6811#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
6812#define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6813
6814/* C6x unwinding tables use a different format for the typeinfo tables. */
6815#undef TARGET_ASM_TTYPE
6816#define TARGET_ASM_TTYPE c6x_output_ttype
6817
7e5fc0c4 6818/* The C6x ABI follows the ARM EABI exception handling rules. */
6819#undef TARGET_ARM_EABI_UNWINDER
6820#define TARGET_ARM_EABI_UNWINDER true
6821
9936b574 6822#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6823#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6824
6825#undef TARGET_ASM_INIT_SECTIONS
6826#define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6827
7e5fc0c4 6828#undef TARGET_DEBUG_UNWIND_INFO
6829#define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info
6830
5aa04b01 6831#undef TARGET_DWARF_REGISTER_SPAN
6832#define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6833
6834#undef TARGET_INIT_BUILTINS
6835#define TARGET_INIT_BUILTINS c6x_init_builtins
6836#undef TARGET_EXPAND_BUILTIN
6837#define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6838#undef TARGET_BUILTIN_DECL
6839#define TARGET_BUILTIN_DECL c6x_builtin_decl
6840
6841struct gcc_target targetm = TARGET_INITIALIZER;
6842
6843#include "gt-c6x.h"