]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/c6x/c6x.c
use rtx_insn * more places where it is obvious
[thirdparty/gcc.git] / gcc / config / c6x / c6x.c
1 /* Target Code for TI C6X
2 Copyright (C) 2010-2016 Free Software Foundation, Inc.
3 Contributed by Andrew Jenner <andrew@codesourcery.com>
4 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple-expr.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "stor-layout.h"
42 #include "varasm.h"
43 #include "calls.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "cfgrtl.h"
49 #include "sched-int.h"
50 #include "tm-constrs.h"
51 #include "langhooks.h"
52 #include "sel-sched.h"
53 #include "debug.h"
54 #include "hw-doloop.h"
55 #include "regrename.h"
56 #include "dumpfile.h"
57 #include "builtins.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Table of supported architecture variants. */
63 typedef struct
64 {
65 const char *arch;
66 enum c6x_cpu_type type;
67 unsigned short features;
68 } c6x_arch_table;
69
70 /* A list of all ISAs, mapping each one to a representative device.
71 Used for -march selection. */
72 static const c6x_arch_table all_isas[] =
73 {
74 #define C6X_ISA(NAME,DEVICE,FLAGS) \
75 { NAME, DEVICE, FLAGS },
76 #include "c6x-isas.def"
77 #undef C6X_ISA
78 { NULL, C6X_CPU_C62X, 0 }
79 };
80
81 /* This is the parsed result of the "-march=" option, if given. */
82 enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
83
84 /* A mask of insn types that are allowed by the architecture selected by
85 the -march option. */
86 unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
87
88 /* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
89 */
90 static rtx_insn *c6x_current_insn = NULL;
91
92 /* A decl we build to access __c6xabi_DSBT_base. */
93 static GTY(()) tree dsbt_decl;
94 \f
95 /* Determines whether we run our final scheduling pass or not. We always
96 avoid the normal second scheduling pass. */
97 static int c6x_flag_schedule_insns2;
98
99 /* Determines whether we run variable tracking in machine dependent
100 reorganization. */
101 static int c6x_flag_var_tracking;
102
103 /* Determines whether we use modulo scheduling. */
104 static int c6x_flag_modulo_sched;
105
106 /* Record the state of flag_pic before we set it to 1 for DSBT. */
107 int c6x_initial_flag_pic;
108 \f
109 typedef struct
110 {
111 /* We record the clock cycle for every insn during scheduling. */
112 int clock;
113 /* After scheduling, we run assign_reservations to choose unit
114 reservations for all insns. These are recorded here. */
115 int reservation;
116 /* Records the new condition for insns which must be made
117 conditional after scheduling. An entry of NULL_RTX means no such
118 change is necessary. */
119 rtx new_cond;
120 /* True for the first insn that was scheduled in an ebb. */
121 bool ebb_start;
122 /* The scheduler state after the insn, transformed into a mask of UNIT_QID
123 bits rather than storing the state. Meaningful only for the last
124 insn in a cycle. */
125 unsigned int unit_mask;
126 } c6x_sched_insn_info;
127
128
129 /* Record a c6x_sched_insn_info structure for every insn in the function. */
130 static vec<c6x_sched_insn_info> insn_info;
131
132 #define INSN_INFO_LENGTH (insn_info).length ()
133 #define INSN_INFO_ENTRY(N) (insn_info[(N)])
134
135 static bool done_cfi_sections;
136
137 #define RESERVATION_FLAG_D 1
138 #define RESERVATION_FLAG_L 2
139 #define RESERVATION_FLAG_S 4
140 #define RESERVATION_FLAG_M 8
141 #define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
142 #define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
143 #define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
144 #define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
145
146 /* The DFA names of the units. */
147 static const char *const c6x_unit_names[] =
148 {
149 "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
150 "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
151 };
152
153 /* The DFA unit number for each unit in c6x_unit_names[]. */
154 static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
155
156 /* Unit query IDs. */
157 #define UNIT_QID_D1 0
158 #define UNIT_QID_L1 1
159 #define UNIT_QID_S1 2
160 #define UNIT_QID_M1 3
161 #define UNIT_QID_FPS1 4
162 #define UNIT_QID_FPL1 5
163 #define UNIT_QID_ADDDPS1 6
164 #define UNIT_QID_ADDDPL1 7
165 #define UNIT_QID_SIDE_OFFSET 8
166
167 #define RESERVATION_S1 2
168 #define RESERVATION_S2 10
169
170 /* An enum for the unit requirements we count in the UNIT_REQS table. */
171 enum unitreqs
172 {
173 UNIT_REQ_D,
174 UNIT_REQ_L,
175 UNIT_REQ_S,
176 UNIT_REQ_M,
177 UNIT_REQ_DL,
178 UNIT_REQ_DS,
179 UNIT_REQ_LS,
180 UNIT_REQ_DLS,
181 UNIT_REQ_T,
182 UNIT_REQ_X,
183 UNIT_REQ_MAX
184 };
185
186 /* A table used to count unit requirements. Used when computing minimum
187 iteration intervals. */
188 typedef int unit_req_table[2][UNIT_REQ_MAX];
189 static unit_req_table unit_reqs;
190 \f
191 /* Register map for debugging. */
192 unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] =
193 {
194 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */
195 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */
196 50, 51, 52,
197 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */
198 29, 30, 31,
199 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */
200 66, 67, 68,
201 -1, -1, -1 /* FP, ARGP, ILC. */
202 };
203 \f
204 /* Allocate a new, cleared machine_function structure. */
205
206 static struct machine_function *
207 c6x_init_machine_status (void)
208 {
209 return ggc_cleared_alloc<machine_function> ();
210 }
211
212 /* Implement TARGET_OPTION_OVERRIDE. */
213
214 static void
215 c6x_option_override (void)
216 {
217 unsigned i;
218
219 if (global_options_set.x_c6x_arch_option)
220 {
221 c6x_arch = all_isas[c6x_arch_option].type;
222 c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
223 c6x_insn_mask |= all_isas[c6x_arch_option].features;
224 }
225
226 c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
227 flag_schedule_insns_after_reload = 0;
228
229 c6x_flag_modulo_sched = flag_modulo_sched;
230 flag_modulo_sched = 0;
231
232 init_machine_status = c6x_init_machine_status;
233
234 for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
235 c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
236
237 if (flag_pic && !TARGET_DSBT)
238 {
239 error ("-fpic and -fPIC not supported without -mdsbt on this target");
240 flag_pic = 0;
241 }
242 c6x_initial_flag_pic = flag_pic;
243 if (TARGET_DSBT && !flag_pic)
244 flag_pic = 1;
245 }
246
247
248 /* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */
249
250 static void
251 c6x_conditional_register_usage (void)
252 {
253 int i;
254 if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
255 for (i = 16; i < 32; i++)
256 {
257 fixed_regs[i] = 1;
258 fixed_regs[32 + i] = 1;
259 }
260 if (TARGET_INSNS_64)
261 {
262 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
263 REG_A0);
264 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
265 REG_A0);
266 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
267 REG_A0);
268 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
269 REG_A0);
270 }
271 }
272 \f
273 static GTY(()) rtx eqdf_libfunc;
274 static GTY(()) rtx nedf_libfunc;
275 static GTY(()) rtx ledf_libfunc;
276 static GTY(()) rtx ltdf_libfunc;
277 static GTY(()) rtx gedf_libfunc;
278 static GTY(()) rtx gtdf_libfunc;
279 static GTY(()) rtx eqsf_libfunc;
280 static GTY(()) rtx nesf_libfunc;
281 static GTY(()) rtx lesf_libfunc;
282 static GTY(()) rtx ltsf_libfunc;
283 static GTY(()) rtx gesf_libfunc;
284 static GTY(()) rtx gtsf_libfunc;
285 static GTY(()) rtx strasgi_libfunc;
286 static GTY(()) rtx strasgi64p_libfunc;
287
288 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
289 functions to match the C6x ABI. */
290
291 static void
292 c6x_init_libfuncs (void)
293 {
294 /* Double-precision floating-point arithmetic. */
295 set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
296 set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
297 set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
298 set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
299 set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
300
301 /* Single-precision floating-point arithmetic. */
302 set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
303 set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
304 set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
305 set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
306 set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
307
308 /* Floating-point comparisons. */
309 eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
310 nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
311 lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
312 ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
313 gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
314 gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
315 eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
316 nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
317 ledf_libfunc = init_one_libfunc ("__c6xabi_led");
318 ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
319 gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
320 gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
321
322 set_optab_libfunc (eq_optab, SFmode, NULL);
323 set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
324 set_optab_libfunc (gt_optab, SFmode, NULL);
325 set_optab_libfunc (ge_optab, SFmode, NULL);
326 set_optab_libfunc (lt_optab, SFmode, NULL);
327 set_optab_libfunc (le_optab, SFmode, NULL);
328 set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
329 set_optab_libfunc (eq_optab, DFmode, NULL);
330 set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
331 set_optab_libfunc (gt_optab, DFmode, NULL);
332 set_optab_libfunc (ge_optab, DFmode, NULL);
333 set_optab_libfunc (lt_optab, DFmode, NULL);
334 set_optab_libfunc (le_optab, DFmode, NULL);
335 set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
336
337 /* Floating-point to integer conversions. */
338 set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
339 set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
340 set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
341 set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
342 set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
343 set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
344 set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
345 set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
346
347 /* Conversions between floating types. */
348 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
349 set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
350
351 /* Integer to floating-point conversions. */
352 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
353 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
354 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
355 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
356 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
357 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
358 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
359 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
360
361 /* Long long. */
362 set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
363 set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
364 set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
365 set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
366
367 set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
368 set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
369 set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
370 set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
371 set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
372 set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
373 set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
374 set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
375 set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
376 set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
377 set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
378
379 /* Block move. */
380 strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
381 strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
382 }
383
384 /* Begin the assembly file. */
385
386 static void
387 c6x_file_start (void)
388 {
389 /* Variable tracking should be run after all optimizations which change order
390 of insns. It also needs a valid CFG. This can't be done in
391 c6x_override_options, because flag_var_tracking is finalized after
392 that. */
393 c6x_flag_var_tracking = flag_var_tracking;
394 flag_var_tracking = 0;
395
396 done_cfi_sections = false;
397 default_file_start ();
398
399 /* Arrays are aligned to 8-byte boundaries. */
400 asm_fprintf (asm_out_file,
401 "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
402 asm_fprintf (asm_out_file,
403 "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
404
405 /* Stack alignment is 8 bytes. */
406 asm_fprintf (asm_out_file,
407 "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
408 asm_fprintf (asm_out_file,
409 "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
410
411 #if 0 /* FIXME: Reenable when TI's tools are fixed. */
412 /* ??? Ideally we'd check flag_short_wchar somehow. */
413 asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
414 #endif
415
416 /* We conform to version 1.0 of the ABI. */
417 asm_fprintf (asm_out_file,
418 "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
419
420 }
421
422 /* The LTO frontend only enables exceptions when it sees a function that
423 uses it. This changes the return value of dwarf2out_do_frame, so we
424 have to check before every function. */
425
426 void
427 c6x_output_file_unwind (FILE * f)
428 {
429 if (done_cfi_sections)
430 return;
431
432 /* Output a .cfi_sections directive. */
433 if (dwarf2out_do_frame ())
434 {
435 if (flag_unwind_tables || flag_exceptions)
436 {
437 if (write_symbols == DWARF2_DEBUG
438 || write_symbols == VMS_AND_DWARF2_DEBUG)
439 asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
440 else
441 asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
442 }
443 else
444 asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
445 done_cfi_sections = true;
446 }
447 }
448
449 /* Output unwind directives at the end of a function. */
450
451 static void
452 c6x_output_fn_unwind (FILE * f)
453 {
454 /* Return immediately if we are not generating unwinding tables. */
455 if (! (flag_unwind_tables || flag_exceptions))
456 return;
457
458 /* If this function will never be unwound, then mark it as such. */
459 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
460 && (TREE_NOTHROW (current_function_decl)
461 || crtl->all_throwers_are_sibcalls))
462 fputs("\t.cantunwind\n", f);
463
464 fputs ("\t.endp\n", f);
465 }
466
467 \f
468 /* Stack and Calling. */
469
470 int argument_registers[10] =
471 {
472 REG_A4, REG_B4,
473 REG_A6, REG_B6,
474 REG_A8, REG_B8,
475 REG_A10, REG_B10,
476 REG_A12, REG_B12
477 };
478
479 /* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */
480
481 void
482 c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
483 int n_named_args ATTRIBUTE_UNUSED)
484 {
485 cum->count = 0;
486 cum->nregs = 10;
487 if (!libname && fntype)
488 {
489 /* We need to find out the number of named arguments. Unfortunately,
490 for incoming arguments, N_NAMED_ARGS is set to -1. */
491 if (stdarg_p (fntype))
492 cum->nregs = type_num_arguments (fntype) - 1;
493 if (cum->nregs > 10)
494 cum->nregs = 10;
495 }
496 }
497
498 /* Implements the macro FUNCTION_ARG defined in c6x.h. */
499
500 static rtx
501 c6x_function_arg (cumulative_args_t cum_v, machine_mode mode,
502 const_tree type, bool named ATTRIBUTE_UNUSED)
503 {
504 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
505 if (cum->count >= cum->nregs)
506 return NULL_RTX;
507 if (type)
508 {
509 HOST_WIDE_INT size = int_size_in_bytes (type);
510 if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
511 {
512 if (size > 4)
513 {
514 rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
515 rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
516 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
517 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
518 return gen_rtx_PARALLEL (mode, vec);
519 }
520 }
521 }
522 return gen_rtx_REG (mode, argument_registers[cum->count]);
523 }
524
525 static void
526 c6x_function_arg_advance (cumulative_args_t cum_v,
527 machine_mode mode ATTRIBUTE_UNUSED,
528 const_tree type ATTRIBUTE_UNUSED,
529 bool named ATTRIBUTE_UNUSED)
530 {
531 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
532 cum->count++;
533 }
534
535
536 /* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
537 upward rather than downward. */
538
539 bool
540 c6x_block_reg_pad_upward (machine_mode mode ATTRIBUTE_UNUSED,
541 const_tree type, bool first)
542 {
543 HOST_WIDE_INT size;
544
545 if (!TARGET_BIG_ENDIAN)
546 return true;
547 if (!first)
548 return true;
549 if (!type)
550 return true;
551 size = int_size_in_bytes (type);
552 return size == 3;
553 }
554
555 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
556
557 static unsigned int
558 c6x_function_arg_boundary (machine_mode mode, const_tree type)
559 {
560 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
561
562 if (boundary > BITS_PER_WORD)
563 return 2 * BITS_PER_WORD;
564
565 if (mode == BLKmode)
566 {
567 HOST_WIDE_INT size = int_size_in_bytes (type);
568 if (size > 4)
569 return 2 * BITS_PER_WORD;
570 if (boundary < BITS_PER_WORD)
571 {
572 if (size >= 3)
573 return BITS_PER_WORD;
574 if (size >= 2)
575 return 2 * BITS_PER_UNIT;
576 }
577 }
578 return boundary;
579 }
580
581 /* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */
582 static unsigned int
583 c6x_function_arg_round_boundary (machine_mode mode, const_tree type)
584 {
585 return c6x_function_arg_boundary (mode, type);
586 }
587
588 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
589 where function FUNC returns or receives a value of data type TYPE. */
590
591 static rtx
592 c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
593 bool outgoing ATTRIBUTE_UNUSED)
594 {
595 /* Functions return values in register A4. When returning aggregates, we may
596 have to adjust for endianness. */
597 if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
598 {
599 HOST_WIDE_INT size = int_size_in_bytes (type);
600 if (size > 4)
601 {
602
603 rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
604 rtx reg2 = gen_rtx_REG (SImode, REG_A4);
605 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
606 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
607 return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
608 }
609 }
610 return gen_rtx_REG (TYPE_MODE (type), REG_A4);
611 }
612
613 /* Implement TARGET_LIBCALL_VALUE. */
614
615 static rtx
616 c6x_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
617 {
618 return gen_rtx_REG (mode, REG_A4);
619 }
620
621 /* TARGET_STRUCT_VALUE_RTX implementation. */
622
623 static rtx
624 c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
625 {
626 return gen_rtx_REG (Pmode, REG_A3);
627 }
628
629 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
630
631 static bool
632 c6x_function_value_regno_p (const unsigned int regno)
633 {
634 return regno == REG_A4;
635 }
636
637 /* Types larger than 64 bit, and variable sized types, are passed by
638 reference. The callee must copy them; see c6x_callee_copies. */
639
640 static bool
641 c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
642 machine_mode mode, const_tree type,
643 bool named ATTRIBUTE_UNUSED)
644 {
645 int size = -1;
646 if (type)
647 size = int_size_in_bytes (type);
648 else if (mode != VOIDmode)
649 size = GET_MODE_SIZE (mode);
650 return size > 2 * UNITS_PER_WORD || size == -1;
651 }
652
653 /* Decide whether a type should be returned in memory (true)
654 or in a register (false). This is called by the macro
655 TARGET_RETURN_IN_MEMORY. */
656
657 static bool
658 c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
659 {
660 int size = int_size_in_bytes (type);
661 return size > 2 * UNITS_PER_WORD || size == -1;
662 }
663
664 /* Values which must be returned in the most-significant end of the return
665 register. */
666
667 static bool
668 c6x_return_in_msb (const_tree valtype)
669 {
670 HOST_WIDE_INT size = int_size_in_bytes (valtype);
671 return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
672 }
673
674 /* Implement TARGET_CALLEE_COPIES. */
675
676 static bool
677 c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
678 machine_mode mode ATTRIBUTE_UNUSED,
679 const_tree type ATTRIBUTE_UNUSED,
680 bool named ATTRIBUTE_UNUSED)
681 {
682 return true;
683 }
684
685 /* Return the type to use as __builtin_va_list. */
686 static tree
687 c6x_build_builtin_va_list (void)
688 {
689 return build_pointer_type (char_type_node);
690 }
691 \f
692 static void
693 c6x_asm_trampoline_template (FILE *f)
694 {
695 fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
696 fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
697 fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
698 fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
699 fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
700 fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
701 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
702 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
703 }
704
705 /* Emit RTL insns to initialize the variable parts of a trampoline at
706 TRAMP. FNADDR is an RTX for the address of the function's pure
707 code. CXT is an RTX for the static chain value for the function. */
708
709 static void
710 c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
711 {
712 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
713 rtx t1 = copy_to_reg (fnaddr);
714 rtx t2 = copy_to_reg (cxt);
715 rtx mask = gen_reg_rtx (SImode);
716 int i;
717
718 emit_block_move (tramp, assemble_trampoline_template (),
719 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
720
721 emit_move_insn (mask, GEN_INT (0xffff << 7));
722
723 for (i = 0; i < 4; i++)
724 {
725 rtx mem = adjust_address (tramp, SImode, i * 4);
726 rtx t = (i & 1) ? t2 : t1;
727 rtx v1 = gen_reg_rtx (SImode);
728 rtx v2 = gen_reg_rtx (SImode);
729 emit_move_insn (v1, mem);
730 if (i < 2)
731 emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
732 else
733 emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
734 emit_insn (gen_andsi3 (v2, v2, mask));
735 emit_insn (gen_iorsi3 (v2, v2, v1));
736 emit_move_insn (mem, v2);
737 }
738 #ifdef CLEAR_INSN_CACHE
739 tramp = XEXP (tramp, 0);
740 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
741 LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
742 plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
743 Pmode);
744 #endif
745 }
746 \f
747 /* Determine whether c6x_output_mi_thunk can succeed. */
748
749 static bool
750 c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
751 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
752 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
753 const_tree function ATTRIBUTE_UNUSED)
754 {
755 return !TARGET_LONG_CALLS;
756 }
757
758 /* Output the assembler code for a thunk function. THUNK is the
759 declaration for the thunk function itself, FUNCTION is the decl for
760 the target function. DELTA is an immediate constant offset to be
761 added to THIS. If VCALL_OFFSET is nonzero, the word at
762 *(*this + vcall_offset) should be added to THIS. */
763
764 static void
765 c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
766 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
767 HOST_WIDE_INT vcall_offset, tree function)
768 {
769 rtx xops[5];
770 /* The this parameter is passed as the first argument. */
771 rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
772
773 c6x_current_insn = NULL;
774
775 xops[4] = XEXP (DECL_RTL (function), 0);
776 if (!vcall_offset)
777 {
778 output_asm_insn ("b .s2 \t%4", xops);
779 if (!delta)
780 output_asm_insn ("nop 5", xops);
781 }
782
783 /* Adjust the this parameter by a fixed constant. */
784 if (delta)
785 {
786 xops[0] = GEN_INT (delta);
787 xops[1] = this_rtx;
788 if (delta >= -16 && delta <= 15)
789 {
790 output_asm_insn ("add .s1 %0, %1, %1", xops);
791 if (!vcall_offset)
792 output_asm_insn ("nop 4", xops);
793 }
794 else if (delta >= 16 && delta < 32)
795 {
796 output_asm_insn ("add .d1 %0, %1, %1", xops);
797 if (!vcall_offset)
798 output_asm_insn ("nop 4", xops);
799 }
800 else if (delta >= -32768 && delta < 32768)
801 {
802 output_asm_insn ("mvk .s1 %0, A0", xops);
803 output_asm_insn ("add .d1 %1, A0, %1", xops);
804 if (!vcall_offset)
805 output_asm_insn ("nop 3", xops);
806 }
807 else
808 {
809 output_asm_insn ("mvkl .s1 %0, A0", xops);
810 output_asm_insn ("mvkh .s1 %0, A0", xops);
811 output_asm_insn ("add .d1 %1, A0, %1", xops);
812 if (!vcall_offset)
813 output_asm_insn ("nop 3", xops);
814 }
815 }
816
817 /* Adjust the this parameter by a value stored in the vtable. */
818 if (vcall_offset)
819 {
820 rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
821 rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
822
823 xops[1] = a3tmp;
824 xops[2] = a0tmp;
825 xops[3] = gen_rtx_MEM (Pmode, a0tmp);
826 output_asm_insn ("mv .s1 a4, %2", xops);
827 output_asm_insn ("ldw .d1t1 %3, %2", xops);
828
829 /* Adjust the this parameter. */
830 xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
831 vcall_offset));
832 if (!memory_operand (xops[0], Pmode))
833 {
834 rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
835 xops[0] = GEN_INT (vcall_offset);
836 xops[1] = tmp2;
837 output_asm_insn ("mvkl .s1 %0, %1", xops);
838 output_asm_insn ("mvkh .s1 %0, %1", xops);
839 output_asm_insn ("nop 2", xops);
840 output_asm_insn ("add .d1 %2, %1, %2", xops);
841 xops[0] = gen_rtx_MEM (Pmode, a0tmp);
842 }
843 else
844 output_asm_insn ("nop 4", xops);
845 xops[2] = this_rtx;
846 output_asm_insn ("ldw .d1t1 %0, %1", xops);
847 output_asm_insn ("|| b .s2 \t%4", xops);
848 output_asm_insn ("nop 4", xops);
849 output_asm_insn ("add .d1 %2, %1, %2", xops);
850 }
851 }
852 \f
853 /* Return true if EXP goes in small data/bss. */
854
855 static bool
856 c6x_in_small_data_p (const_tree exp)
857 {
858 /* We want to merge strings, so we never consider them small data. */
859 if (TREE_CODE (exp) == STRING_CST)
860 return false;
861
862 /* Functions are never small data. */
863 if (TREE_CODE (exp) == FUNCTION_DECL)
864 return false;
865
866 if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
867 return false;
868
869 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
870 {
871 const char *section = DECL_SECTION_NAME (exp);
872
873 if (strcmp (section, ".neardata") == 0
874 || strncmp (section, ".neardata.", 10) == 0
875 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
876 || strcmp (section, ".bss") == 0
877 || strncmp (section, ".bss.", 5) == 0
878 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
879 || strcmp (section, ".rodata") == 0
880 || strncmp (section, ".rodata.", 8) == 0
881 || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
882 return true;
883 }
884 else
885 return PLACE_IN_SDATA_P (exp);
886
887 return false;
888 }
889
890 /* Return a section for X. The only special thing we do here is to
891 honor small data. We don't have a tree type, so we can't use the
892 PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
893 everything sized 8 bytes or smaller into small data. */
894
895 static section *
896 c6x_select_rtx_section (machine_mode mode, rtx x,
897 unsigned HOST_WIDE_INT align)
898 {
899 if (c6x_sdata_mode == C6X_SDATA_ALL
900 || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
901 /* ??? Consider using mergeable sdata sections. */
902 return sdata_section;
903 else
904 return default_elf_select_rtx_section (mode, x, align);
905 }
906
907 static section *
908 c6x_elf_select_section (tree decl, int reloc,
909 unsigned HOST_WIDE_INT align)
910 {
911 const char *sname = NULL;
912 unsigned int flags = SECTION_WRITE;
913 if (c6x_in_small_data_p (decl))
914 {
915 switch (categorize_decl_for_section (decl, reloc))
916 {
917 case SECCAT_SRODATA:
918 sname = ".rodata";
919 flags = 0;
920 break;
921 case SECCAT_SDATA:
922 sname = ".neardata";
923 break;
924 case SECCAT_SBSS:
925 sname = ".bss";
926 flags |= SECTION_BSS;
927 default:
928 break;
929 }
930 }
931 else
932 {
933 switch (categorize_decl_for_section (decl, reloc))
934 {
935 case SECCAT_DATA:
936 sname = ".fardata";
937 break;
938 case SECCAT_DATA_REL:
939 sname = ".fardata.rel";
940 break;
941 case SECCAT_DATA_REL_LOCAL:
942 sname = ".fardata.rel.local";
943 break;
944 case SECCAT_DATA_REL_RO:
945 sname = ".fardata.rel.ro";
946 break;
947 case SECCAT_DATA_REL_RO_LOCAL:
948 sname = ".fardata.rel.ro.local";
949 break;
950 case SECCAT_BSS:
951 sname = ".far";
952 flags |= SECTION_BSS;
953 break;
954 case SECCAT_RODATA:
955 sname = ".const";
956 flags = 0;
957 break;
958 case SECCAT_SRODATA:
959 case SECCAT_SDATA:
960 case SECCAT_SBSS:
961 gcc_unreachable ();
962 default:
963 break;
964 }
965 }
966 if (sname)
967 {
968 /* We might get called with string constants, but get_named_section
969 doesn't like them as they are not DECLs. Also, we need to set
970 flags in that case. */
971 if (!DECL_P (decl))
972 return get_section (sname, flags, NULL);
973 return get_named_section (decl, sname, reloc);
974 }
975
976 return default_elf_select_section (decl, reloc, align);
977 }
978
979 /* Build up a unique section name, expressed as a
980 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
981 RELOC indicates whether the initial value of EXP requires
982 link-time relocations. */
983
984 static void ATTRIBUTE_UNUSED
985 c6x_elf_unique_section (tree decl, int reloc)
986 {
987 const char *prefix = NULL;
988 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
989 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
990
991 if (c6x_in_small_data_p (decl))
992 {
993 switch (categorize_decl_for_section (decl, reloc))
994 {
995 case SECCAT_SDATA:
996 prefix = one_only ? ".s" : ".neardata";
997 break;
998 case SECCAT_SBSS:
999 prefix = one_only ? ".sb" : ".bss";
1000 break;
1001 case SECCAT_SRODATA:
1002 prefix = one_only ? ".s2" : ".rodata";
1003 break;
1004 case SECCAT_RODATA_MERGE_STR:
1005 case SECCAT_RODATA_MERGE_STR_INIT:
1006 case SECCAT_RODATA_MERGE_CONST:
1007 case SECCAT_RODATA:
1008 case SECCAT_DATA:
1009 case SECCAT_DATA_REL:
1010 case SECCAT_DATA_REL_LOCAL:
1011 case SECCAT_DATA_REL_RO:
1012 case SECCAT_DATA_REL_RO_LOCAL:
1013 gcc_unreachable ();
1014 default:
1015 /* Everything else we place into default sections and hope for the
1016 best. */
1017 break;
1018 }
1019 }
1020 else
1021 {
1022 switch (categorize_decl_for_section (decl, reloc))
1023 {
1024 case SECCAT_DATA:
1025 case SECCAT_DATA_REL:
1026 case SECCAT_DATA_REL_LOCAL:
1027 case SECCAT_DATA_REL_RO:
1028 case SECCAT_DATA_REL_RO_LOCAL:
1029 prefix = one_only ? ".fd" : ".fardata";
1030 break;
1031 case SECCAT_BSS:
1032 prefix = one_only ? ".fb" : ".far";
1033 break;
1034 case SECCAT_RODATA:
1035 case SECCAT_RODATA_MERGE_STR:
1036 case SECCAT_RODATA_MERGE_STR_INIT:
1037 case SECCAT_RODATA_MERGE_CONST:
1038 prefix = one_only ? ".fr" : ".const";
1039 break;
1040 case SECCAT_SRODATA:
1041 case SECCAT_SDATA:
1042 case SECCAT_SBSS:
1043 gcc_unreachable ();
1044 default:
1045 break;
1046 }
1047 }
1048
1049 if (prefix)
1050 {
1051 const char *name, *linkonce;
1052 char *string;
1053
1054 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1055 name = targetm.strip_name_encoding (name);
1056
1057 /* If we're using one_only, then there needs to be a .gnu.linkonce
1058 prefix to the section name. */
1059 linkonce = one_only ? ".gnu.linkonce" : "";
1060
1061 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1062
1063 set_decl_section_name (decl, string);
1064 return;
1065 }
1066 default_unique_section (decl, reloc);
1067 }
1068
1069 static unsigned int
1070 c6x_section_type_flags (tree decl, const char *name, int reloc)
1071 {
1072 unsigned int flags = 0;
1073
1074 if (strcmp (name, ".far") == 0
1075 || strncmp (name, ".far.", 5) == 0)
1076 flags |= SECTION_BSS;
1077
1078 flags |= default_section_type_flags (decl, name, reloc);
1079
1080 return flags;
1081 }
1082 \f
1083 /* Checks whether the given CALL_EXPR would use a caller saved
1084 register. This is used to decide whether sibling call optimization
1085 could be performed on the respective function call. */
1086
1087 static bool
1088 c6x_call_saved_register_used (tree call_expr)
1089 {
1090 CUMULATIVE_ARGS cum_v;
1091 cumulative_args_t cum;
1092 HARD_REG_SET call_saved_regset;
1093 tree parameter;
1094 machine_mode mode;
1095 tree type;
1096 rtx parm_rtx;
1097 int i;
1098
1099 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1100 cum = pack_cumulative_args (&cum_v);
1101
1102 COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1103 for (i = 0; i < call_expr_nargs (call_expr); i++)
1104 {
1105 parameter = CALL_EXPR_ARG (call_expr, i);
1106 gcc_assert (parameter);
1107
1108 /* For an undeclared variable passed as parameter we will get
1109 an ERROR_MARK node here. */
1110 if (TREE_CODE (parameter) == ERROR_MARK)
1111 return true;
1112
1113 type = TREE_TYPE (parameter);
1114 gcc_assert (type);
1115
1116 mode = TYPE_MODE (type);
1117 gcc_assert (mode);
1118
1119 if (pass_by_reference (&cum_v, mode, type, true))
1120 {
1121 mode = Pmode;
1122 type = build_pointer_type (type);
1123 }
1124
1125 parm_rtx = c6x_function_arg (cum, mode, type, 0);
1126
1127 c6x_function_arg_advance (cum, mode, type, 0);
1128
1129 if (!parm_rtx)
1130 continue;
1131
1132 if (REG_P (parm_rtx)
1133 && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1134 REGNO (parm_rtx)))
1135 return true;
1136 if (GET_CODE (parm_rtx) == PARALLEL)
1137 {
1138 int n = XVECLEN (parm_rtx, 0);
1139 while (n-- > 0)
1140 {
1141 rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1142 if (REG_P (x)
1143 && overlaps_hard_reg_set_p (call_saved_regset,
1144 GET_MODE (x), REGNO (x)))
1145 return true;
1146 }
1147 }
1148 }
1149 return false;
1150 }
1151
1152 /* Decide whether we can make a sibling call to a function. DECL is the
1153 declaration of the function being targeted by the call and EXP is the
1154 CALL_EXPR representing the call. */
1155
1156 static bool
1157 c6x_function_ok_for_sibcall (tree decl, tree exp)
1158 {
1159 /* Registers A10, A12, B10 and B12 are available as arguments
1160 register but unfortunately caller saved. This makes functions
1161 needing these registers for arguments not suitable for
1162 sibcalls. */
1163 if (c6x_call_saved_register_used (exp))
1164 return false;
1165
1166 if (!flag_pic)
1167 return true;
1168
1169 if (TARGET_DSBT)
1170 {
1171 /* When compiling for DSBT, the calling function must be local,
1172 so that when we reload B14 in the sibcall epilogue, it will
1173 not change its value. */
1174 struct cgraph_local_info *this_func;
1175
1176 if (!decl)
1177 /* Not enough information. */
1178 return false;
1179
1180 this_func = cgraph_node::local_info (current_function_decl);
1181 return this_func->local;
1182 }
1183
1184 return true;
1185 }
1186
1187 /* Return true if DECL is known to be linked into section SECTION. */
1188
1189 static bool
1190 c6x_function_in_section_p (tree decl, section *section)
1191 {
1192 /* We can only be certain about functions defined in the same
1193 compilation unit. */
1194 if (!TREE_STATIC (decl))
1195 return false;
1196
1197 /* Make sure that SYMBOL always binds to the definition in this
1198 compilation unit. */
1199 if (!targetm.binds_local_p (decl))
1200 return false;
1201
1202 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
1203 if (!DECL_SECTION_NAME (decl))
1204 {
1205 /* Make sure that we will not create a unique section for DECL. */
1206 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
1207 return false;
1208 }
1209
1210 return function_section (decl) == section;
1211 }
1212
1213 /* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1214 as a long call. */
1215 bool
1216 c6x_long_call_p (rtx op)
1217 {
1218 tree decl;
1219
1220 if (!TARGET_LONG_CALLS)
1221 return false;
1222
1223 decl = SYMBOL_REF_DECL (op);
1224
1225 /* Try to determine whether the symbol is in the same section as the current
1226 function. Be conservative, and only cater for cases in which the
1227 whole of the current function is placed in the same section. */
1228 if (decl != NULL_TREE
1229 && !flag_reorder_blocks_and_partition
1230 && TREE_CODE (decl) == FUNCTION_DECL
1231 && c6x_function_in_section_p (decl, current_function_section ()))
1232 return false;
1233
1234 return true;
1235 }
1236
1237 /* Emit the sequence for a call. */
1238 void
1239 c6x_expand_call (rtx retval, rtx address, bool sibcall)
1240 {
1241 rtx callee = XEXP (address, 0);
1242 rtx call_insn;
1243
1244 if (!c6x_call_operand (callee, Pmode))
1245 {
1246 callee = force_reg (Pmode, callee);
1247 address = change_address (address, Pmode, callee);
1248 }
1249 call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1250 if (sibcall)
1251 {
1252 call_insn = emit_call_insn (call_insn);
1253 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1254 gen_rtx_REG (Pmode, REG_B3));
1255 }
1256 else
1257 {
1258 if (retval == NULL_RTX)
1259 call_insn = emit_call_insn (call_insn);
1260 else
1261 call_insn = emit_call_insn (gen_rtx_SET (retval, call_insn));
1262 }
1263 if (flag_pic)
1264 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1265 }
1266
1267 /* Legitimize PIC addresses. If the address is already position-independent,
1268 we return ORIG. Newly generated position-independent addresses go into a
1269 reg. This is REG if nonzero, otherwise we allocate register(s) as
1270 necessary. PICREG is the register holding the pointer to the PIC offset
1271 table. */
1272
1273 static rtx
1274 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1275 {
1276 rtx addr = orig;
1277 rtx new_rtx = orig;
1278
1279 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1280 {
1281 int unspec = UNSPEC_LOAD_GOT;
1282 rtx tmp;
1283
1284 if (reg == 0)
1285 {
1286 gcc_assert (can_create_pseudo_p ());
1287 reg = gen_reg_rtx (Pmode);
1288 }
1289 if (flag_pic == 2)
1290 {
1291 if (can_create_pseudo_p ())
1292 tmp = gen_reg_rtx (Pmode);
1293 else
1294 tmp = reg;
1295 emit_insn (gen_movsi_gotoff_high (tmp, addr));
1296 emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1297 emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1298 }
1299 else
1300 {
1301 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1302 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1303
1304 emit_move_insn (reg, new_rtx);
1305 }
1306 if (picreg == pic_offset_table_rtx)
1307 crtl->uses_pic_offset_table = 1;
1308 return reg;
1309 }
1310
1311 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1312 {
1313 rtx base;
1314
1315 if (GET_CODE (addr) == CONST)
1316 {
1317 addr = XEXP (addr, 0);
1318 gcc_assert (GET_CODE (addr) == PLUS);
1319 }
1320
1321 if (XEXP (addr, 0) == picreg)
1322 return orig;
1323
1324 if (reg == 0)
1325 {
1326 gcc_assert (can_create_pseudo_p ());
1327 reg = gen_reg_rtx (Pmode);
1328 }
1329
1330 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1331 addr = legitimize_pic_address (XEXP (addr, 1),
1332 base == reg ? NULL_RTX : reg,
1333 picreg);
1334
1335 if (GET_CODE (addr) == CONST_INT)
1336 {
1337 gcc_assert (! reload_in_progress && ! reload_completed);
1338 addr = force_reg (Pmode, addr);
1339 }
1340
1341 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1342 {
1343 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1344 addr = XEXP (addr, 1);
1345 }
1346
1347 return gen_rtx_PLUS (Pmode, base, addr);
1348 }
1349
1350 return new_rtx;
1351 }
1352
1353 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
1354 Returns true if no further code must be generated, false if the caller
1355 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1356
1357 bool
1358 expand_move (rtx *operands, machine_mode mode)
1359 {
1360 rtx dest = operands[0];
1361 rtx op = operands[1];
1362
1363 if ((reload_in_progress | reload_completed) == 0
1364 && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1365 operands[1] = force_reg (mode, op);
1366 else if (mode == SImode && symbolic_operand (op, SImode))
1367 {
1368 if (flag_pic)
1369 {
1370 if (sdata_symbolic_operand (op, SImode))
1371 {
1372 emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1373 crtl->uses_pic_offset_table = 1;
1374 return true;
1375 }
1376 else
1377 {
1378 rtx temp = (reload_completed || reload_in_progress
1379 ? dest : gen_reg_rtx (Pmode));
1380
1381 operands[1] = legitimize_pic_address (op, temp,
1382 pic_offset_table_rtx);
1383 }
1384 }
1385 else if (reload_completed
1386 && !sdata_symbolic_operand (op, SImode))
1387 {
1388 emit_insn (gen_movsi_high (dest, op));
1389 emit_insn (gen_movsi_lo_sum (dest, dest, op));
1390 return true;
1391 }
1392 }
1393 return false;
1394 }
1395
1396 /* This function is called when we're about to expand an integer compare
1397 operation which performs COMPARISON. It examines the second operand,
1398 and if it is an integer constant that cannot be used directly on the
1399 current machine in a comparison insn, it returns true. */
1400 bool
1401 c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1402 {
1403 if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1404 return false;
1405
1406 if ((code == EQ || code == LT || code == GT)
1407 && !satisfies_constraint_Is5 (op))
1408 return true;
1409 if ((code == GTU || code == LTU)
1410 && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1411 return true;
1412
1413 return false;
1414 }
1415
1416 /* Emit comparison instruction if necessary, returning the expression
1417 that holds the compare result in the proper mode. Return the comparison
1418 that should be used in the jump insn. */
1419
1420 rtx
1421 c6x_expand_compare (rtx comparison, machine_mode mode)
1422 {
1423 enum rtx_code code = GET_CODE (comparison);
1424 rtx op0 = XEXP (comparison, 0);
1425 rtx op1 = XEXP (comparison, 1);
1426 rtx cmp;
1427 enum rtx_code jump_code = code;
1428 machine_mode op_mode = GET_MODE (op0);
1429
1430 if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1431 {
1432 rtx t = gen_reg_rtx (SImode);
1433 emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1434 gen_highpart (SImode, op0)));
1435 op_mode = SImode;
1436 cmp = t;
1437 }
1438 else if (op_mode == DImode)
1439 {
1440 rtx lo[2], high[2];
1441 rtx cmp1, cmp2;
1442
1443 if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1444 {
1445 code = reverse_condition (code);
1446 jump_code = EQ;
1447 }
1448 else
1449 jump_code = NE;
1450
1451 split_di (&op0, 1, lo, high);
1452 split_di (&op1, 1, lo + 1, high + 1);
1453
1454 if (c6x_force_op_for_comparison_p (code, high[1])
1455 || c6x_force_op_for_comparison_p (EQ, high[1]))
1456 high[1] = force_reg (SImode, high[1]);
1457
1458 cmp1 = gen_reg_rtx (SImode);
1459 cmp2 = gen_reg_rtx (SImode);
1460 emit_insn (gen_rtx_SET (cmp1, gen_rtx_fmt_ee (code, SImode,
1461 high[0], high[1])));
1462 if (code == EQ)
1463 {
1464 if (c6x_force_op_for_comparison_p (code, lo[1]))
1465 lo[1] = force_reg (SImode, lo[1]);
1466 emit_insn (gen_rtx_SET (cmp2, gen_rtx_fmt_ee (code, SImode,
1467 lo[0], lo[1])));
1468 emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1469 }
1470 else
1471 {
1472 emit_insn (gen_rtx_SET (cmp2, gen_rtx_EQ (SImode, high[0],
1473 high[1])));
1474 if (code == GT)
1475 code = GTU;
1476 else if (code == LT)
1477 code = LTU;
1478 if (c6x_force_op_for_comparison_p (code, lo[1]))
1479 lo[1] = force_reg (SImode, lo[1]);
1480 emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1481 lo[0], lo[1]),
1482 lo[0], lo[1], cmp2));
1483 emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1484 }
1485 cmp = cmp1;
1486 }
1487 else if (TARGET_FP && !flag_finite_math_only
1488 && (op_mode == DFmode || op_mode == SFmode)
1489 && code != EQ && code != NE && code != LT && code != GT
1490 && code != UNLE && code != UNGE)
1491 {
1492 enum rtx_code code1, code2, code3;
1493 rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1494
1495 jump_code = NE;
1496 code3 = UNKNOWN;
1497 switch (code)
1498 {
1499 case UNLT:
1500 case UNGT:
1501 jump_code = EQ;
1502 /* fall through */
1503 case LE:
1504 case GE:
1505 code1 = code == LE || code == UNGT ? LT : GT;
1506 code2 = EQ;
1507 break;
1508
1509 case UNORDERED:
1510 jump_code = EQ;
1511 /* fall through */
1512 case ORDERED:
1513 code3 = EQ;
1514 /* fall through */
1515 case LTGT:
1516 code1 = LT;
1517 code2 = GT;
1518 break;
1519
1520 case UNEQ:
1521 code1 = LT;
1522 code2 = GT;
1523 jump_code = EQ;
1524 break;
1525
1526 default:
1527 gcc_unreachable ();
1528 }
1529
1530 cmp = gen_reg_rtx (SImode);
1531 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1532 fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1533 emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1534 op0, op1, cmp));
1535 if (code3 != UNKNOWN)
1536 emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1537 op0, op1, cmp));
1538 }
1539 else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1540 cmp = op0;
1541 else
1542 {
1543 bool is_fp_libfunc;
1544 is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1545
1546 if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1547 && !is_fp_libfunc)
1548 {
1549 code = reverse_condition (code);
1550 jump_code = EQ;
1551 }
1552 else if (code == UNGE)
1553 {
1554 code = LT;
1555 jump_code = EQ;
1556 }
1557 else if (code == UNLE)
1558 {
1559 code = GT;
1560 jump_code = EQ;
1561 }
1562 else
1563 jump_code = NE;
1564
1565 if (is_fp_libfunc)
1566 {
1567 rtx_insn *insns;
1568 rtx libfunc;
1569 switch (code)
1570 {
1571 case EQ:
1572 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1573 break;
1574 case NE:
1575 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1576 break;
1577 case GT:
1578 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1579 break;
1580 case GE:
1581 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1582 break;
1583 case LT:
1584 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1585 break;
1586 case LE:
1587 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1588 break;
1589 default:
1590 gcc_unreachable ();
1591 }
1592 start_sequence ();
1593
1594 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
1595 op0, op_mode, op1, op_mode);
1596 insns = get_insns ();
1597 end_sequence ();
1598
1599 emit_libcall_block (insns, cmp, cmp,
1600 gen_rtx_fmt_ee (code, SImode, op0, op1));
1601 }
1602 else
1603 {
1604 cmp = gen_reg_rtx (SImode);
1605 if (c6x_force_op_for_comparison_p (code, op1))
1606 op1 = force_reg (SImode, op1);
1607 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, SImode,
1608 op0, op1)));
1609 }
1610 }
1611
1612 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1613 }
1614
1615 /* Return one word of double-word value OP. HIGH_P is true to select the
1616 high part, false to select the low part. When encountering auto-increment
1617 addressing, we make the assumption that the low part is going to be accessed
1618 first. */
1619
1620 rtx
1621 c6x_subword (rtx op, bool high_p)
1622 {
1623 unsigned int byte;
1624 machine_mode mode;
1625
1626 mode = GET_MODE (op);
1627 if (mode == VOIDmode)
1628 mode = DImode;
1629
1630 if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1631 byte = UNITS_PER_WORD;
1632 else
1633 byte = 0;
1634
1635 if (MEM_P (op))
1636 {
1637 rtx addr = XEXP (op, 0);
1638 if (GET_CODE (addr) == PLUS || REG_P (addr))
1639 return adjust_address (op, word_mode, byte);
1640 /* FIXME: should really support autoincrement addressing for
1641 multi-word modes. */
1642 gcc_unreachable ();
1643 }
1644
1645 return simplify_gen_subreg (word_mode, op, mode, byte);
1646 }
1647
1648 /* Split one or more DImode RTL references into pairs of SImode
1649 references. The RTL can be REG, offsettable MEM, integer constant, or
1650 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1651 split and "num" is its length. lo_half and hi_half are output arrays
1652 that parallel "operands". */
1653
1654 void
1655 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1656 {
1657 while (num--)
1658 {
1659 rtx op = operands[num];
1660
1661 lo_half[num] = c6x_subword (op, false);
1662 hi_half[num] = c6x_subword (op, true);
1663 }
1664 }
1665
1666 /* Return true if VAL is a mask valid for a clr instruction. */
1667 bool
1668 c6x_valid_mask_p (HOST_WIDE_INT val)
1669 {
1670 int i;
1671 for (i = 0; i < 32; i++)
1672 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1673 break;
1674 for (; i < 32; i++)
1675 if (val & ((unsigned HOST_WIDE_INT)1 << i))
1676 break;
1677 for (; i < 32; i++)
1678 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1679 return false;
1680 return true;
1681 }
1682
1683 /* Expand a block move for a movmemM pattern. */
1684
1685 bool
1686 c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1687 rtx expected_align_exp ATTRIBUTE_UNUSED,
1688 rtx expected_size_exp ATTRIBUTE_UNUSED)
1689 {
1690 unsigned HOST_WIDE_INT align = 1;
1691 unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1692 unsigned HOST_WIDE_INT count = 0, offset = 0;
1693 unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1694
1695 if (CONST_INT_P (align_exp))
1696 align = INTVAL (align_exp);
1697
1698 src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1699 dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1700 min_mem_align = MIN (src_mem_align, dst_mem_align);
1701
1702 if (min_mem_align > align)
1703 align = min_mem_align / BITS_PER_UNIT;
1704 if (src_mem_align < align)
1705 src_mem_align = align;
1706 if (dst_mem_align < align)
1707 dst_mem_align = align;
1708
1709 if (CONST_INT_P (count_exp))
1710 count = INTVAL (count_exp);
1711 else
1712 return false;
1713
1714 /* Make sure we don't need to care about overflow later on. */
1715 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1716 return false;
1717
1718 if (count >= 28 && (count & 3) == 0 && align >= 4)
1719 {
1720 tree dst_expr = MEM_EXPR (dst);
1721 tree src_expr = MEM_EXPR (src);
1722 rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1723 rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1724 rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1725
1726 if (src_expr)
1727 mark_addressable (src_expr);
1728 if (dst_expr)
1729 mark_addressable (dst_expr);
1730 emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
1731 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1732 return true;
1733 }
1734
1735 if (biggest_move > align && !TARGET_INSNS_64)
1736 biggest_move = align;
1737
1738 if (count / biggest_move > 7)
1739 return false;
1740
1741 while (count > 0)
1742 {
1743 rtx reg, reg_lowpart;
1744 machine_mode srcmode, dstmode;
1745 unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1746 int shift;
1747 rtx srcmem, dstmem;
1748
1749 while (biggest_move > count)
1750 biggest_move /= 2;
1751
1752 src_size = dst_size = biggest_move;
1753 if (src_size > src_mem_align && src_size == 2)
1754 src_size = 1;
1755 if (dst_size > dst_mem_align && dst_size == 2)
1756 dst_size = 1;
1757
1758 if (dst_size > src_size)
1759 dst_size = src_size;
1760
1761 srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
1762 dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
1763 if (src_size >= 4)
1764 reg_lowpart = reg = gen_reg_rtx (srcmode);
1765 else
1766 {
1767 reg = gen_reg_rtx (SImode);
1768 reg_lowpart = gen_lowpart (srcmode, reg);
1769 }
1770
1771 srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1772
1773 if (src_size > src_mem_align)
1774 {
1775 enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1776 : CODE_FOR_movmisaligndi);
1777 emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1778 }
1779 else
1780 emit_move_insn (reg_lowpart, srcmem);
1781
1782 src_left = src_size;
1783 shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0;
1784 while (src_left > 0)
1785 {
1786 rtx dstreg = reg_lowpart;
1787
1788 if (src_size > dst_size)
1789 {
1790 rtx srcword = reg;
1791 int shift_amount = shift & (BITS_PER_WORD - 1);
1792 if (src_size > 4)
1793 srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1794 SImode);
1795 if (shift_amount > 0)
1796 {
1797 dstreg = gen_reg_rtx (SImode);
1798 emit_insn (gen_lshrsi3 (dstreg, srcword,
1799 GEN_INT (shift_amount)));
1800 }
1801 else
1802 dstreg = srcword;
1803 dstreg = gen_lowpart (dstmode, dstreg);
1804 }
1805
1806 dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1807 if (dst_size > dst_mem_align)
1808 {
1809 enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1810 : CODE_FOR_movmisaligndi);
1811 emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1812 }
1813 else
1814 emit_move_insn (dstmem, dstreg);
1815
1816 if (TARGET_BIG_ENDIAN)
1817 shift -= dst_size * BITS_PER_UNIT;
1818 else
1819 shift += dst_size * BITS_PER_UNIT;
1820 offset += dst_size;
1821 src_left -= dst_size;
1822 }
1823 count -= src_size;
1824 }
1825 return true;
1826 }
1827 \f
1828 /* Subroutine of print_address_operand, print a single address offset OFF for
1829 a memory access of mode MEM_MODE, choosing between normal form and scaled
1830 form depending on the type of the insn. Misaligned memory references must
1831 use the scaled form. */
1832
1833 static void
1834 print_address_offset (FILE *file, rtx off, machine_mode mem_mode)
1835 {
1836 rtx pat;
1837
1838 if (c6x_current_insn != NULL_RTX)
1839 {
1840 pat = PATTERN (c6x_current_insn);
1841 if (GET_CODE (pat) == COND_EXEC)
1842 pat = COND_EXEC_CODE (pat);
1843 if (GET_CODE (pat) == PARALLEL)
1844 pat = XVECEXP (pat, 0, 0);
1845
1846 if (GET_CODE (pat) == SET
1847 && GET_CODE (SET_SRC (pat)) == UNSPEC
1848 && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1849 {
1850 gcc_assert (CONST_INT_P (off)
1851 && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1852 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1853 INTVAL (off) / GET_MODE_SIZE (mem_mode));
1854 return;
1855 }
1856 }
1857 fputs ("(", file);
1858 output_address (mem_mode, off);
1859 fputs (")", file);
1860 }
1861
1862 static bool
1863 c6x_print_operand_punct_valid_p (unsigned char c)
1864 {
1865 return c == '$' || c == '.' || c == '|';
1866 }
1867
1868 static void c6x_print_operand (FILE *, rtx, int);
1869
1870 /* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */
1871
1872 static void
1873 c6x_print_address_operand (FILE *file, rtx x, machine_mode mem_mode)
1874 {
1875 rtx off;
1876 switch (GET_CODE (x))
1877 {
1878 case PRE_MODIFY:
1879 case POST_MODIFY:
1880 if (GET_CODE (x) == POST_MODIFY)
1881 output_address (mem_mode, XEXP (x, 0));
1882 off = XEXP (XEXP (x, 1), 1);
1883 if (XEXP (x, 0) == stack_pointer_rtx)
1884 {
1885 if (GET_CODE (x) == PRE_MODIFY)
1886 gcc_assert (INTVAL (off) > 0);
1887 else
1888 gcc_assert (INTVAL (off) < 0);
1889 }
1890 if (CONST_INT_P (off) && INTVAL (off) < 0)
1891 {
1892 fprintf (file, "--");
1893 off = GEN_INT (-INTVAL (off));
1894 }
1895 else
1896 fprintf (file, "++");
1897 if (GET_CODE (x) == PRE_MODIFY)
1898 output_address (mem_mode, XEXP (x, 0));
1899 print_address_offset (file, off, mem_mode);
1900 break;
1901
1902 case PLUS:
1903 off = XEXP (x, 1);
1904 if (CONST_INT_P (off) && INTVAL (off) < 0)
1905 {
1906 fprintf (file, "-");
1907 off = GEN_INT (-INTVAL (off));
1908 }
1909 else
1910 fprintf (file, "+");
1911 output_address (mem_mode, XEXP (x, 0));
1912 print_address_offset (file, off, mem_mode);
1913 break;
1914
1915 case PRE_DEC:
1916 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1917 fprintf (file, "--");
1918 output_address (mem_mode, XEXP (x, 0));
1919 fprintf (file, "[1]");
1920 break;
1921 case PRE_INC:
1922 fprintf (file, "++");
1923 output_address (mem_mode, XEXP (x, 0));
1924 fprintf (file, "[1]");
1925 break;
1926 case POST_INC:
1927 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1928 output_address (mem_mode, XEXP (x, 0));
1929 fprintf (file, "++[1]");
1930 break;
1931 case POST_DEC:
1932 output_address (mem_mode, XEXP (x, 0));
1933 fprintf (file, "--[1]");
1934 break;
1935
1936 case SYMBOL_REF:
1937 case CONST:
1938 case LABEL_REF:
1939 gcc_assert (sdata_symbolic_operand (x, Pmode));
1940 fprintf (file, "+B14(");
1941 output_addr_const (file, x);
1942 fprintf (file, ")");
1943 break;
1944
1945 case UNSPEC:
1946 switch (XINT (x, 1))
1947 {
1948 case UNSPEC_LOAD_GOT:
1949 fputs ("$GOT(", file);
1950 output_addr_const (file, XVECEXP (x, 0, 0));
1951 fputs (")", file);
1952 break;
1953 case UNSPEC_LOAD_SDATA:
1954 output_addr_const (file, XVECEXP (x, 0, 0));
1955 break;
1956 default:
1957 gcc_unreachable ();
1958 }
1959 break;
1960
1961 default:
1962 gcc_assert (GET_CODE (x) != MEM);
1963 c6x_print_operand (file, x, 0);
1964 break;
1965 }
1966 }
1967
1968 /* Return a single character, which is either 'l', 's', 'd' or 'm', which
1969 specifies the functional unit used by INSN. */
1970
1971 char
1972 c6x_get_unit_specifier (rtx_insn *insn)
1973 {
1974 enum attr_units units;
1975
1976 if (insn_info.exists ())
1977 {
1978 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1979 return c6x_unit_names[unit][0];
1980 }
1981
1982 units = get_attr_units (insn);
1983 switch (units)
1984 {
1985 case UNITS_D:
1986 case UNITS_DL:
1987 case UNITS_DS:
1988 case UNITS_DLS:
1989 case UNITS_D_ADDR:
1990 return 'd';
1991 case UNITS_L:
1992 case UNITS_LS:
1993 return 'l';
1994 case UNITS_S:
1995 return 's';
1996 case UNITS_M:
1997 return 'm';
1998 default:
1999 gcc_unreachable ();
2000 }
2001 }
2002
2003 /* Prints the unit specifier field. */
2004 static void
2005 c6x_print_unit_specifier_field (FILE *file, rtx_insn *insn)
2006 {
2007 enum attr_units units = get_attr_units (insn);
2008 enum attr_cross cross = get_attr_cross (insn);
2009 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2010 int half;
2011 char unitspec;
2012
2013 if (units == UNITS_D_ADDR)
2014 {
2015 enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2016 int t_half;
2017 gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2018 half = arf == ADDR_REGFILE_A ? 1 : 2;
2019 t_half = rf == DEST_REGFILE_A ? 1 : 2;
2020 fprintf (file, ".d%dt%d", half, t_half);
2021 return;
2022 }
2023
2024 if (insn_info.exists ())
2025 {
2026 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2027 fputs (".", file);
2028 fputs (c6x_unit_names[unit], file);
2029 if (cross == CROSS_Y)
2030 fputs ("x", file);
2031 return;
2032 }
2033
2034 gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2035 unitspec = c6x_get_unit_specifier (insn);
2036 half = rf == DEST_REGFILE_A ? 1 : 2;
2037 fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2038 }
2039
2040 /* Output assembly language output for the address ADDR to FILE. */
2041 static void
2042 c6x_print_operand_address (FILE *file, machine_mode mode, rtx addr)
2043 {
2044 c6x_print_address_operand (file, addr, mode);
2045 }
2046
2047 /* Print an operand, X, to FILE, with an optional modifier in CODE.
2048
2049 Meaning of CODE:
2050 $ -- print the unit specifier field for the instruction.
2051 . -- print the predicate for the instruction or an emptry string for an
2052 unconditional one.
2053 | -- print "||" if the insn should be issued in parallel with the previous
2054 one.
2055
2056 C -- print an opcode suffix for a reversed condition
2057 d -- H, W or D as a suffix for ADDA, based on the factor given by the
2058 operand
2059 D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2060 the operand
2061 J -- print a predicate
2062 j -- like J, but use reverse predicate
2063 k -- treat a CONST_INT as a register number and print it as a register
2064 k -- like k, but print out a doubleword register
2065 n -- print an integer operand, negated
2066 p -- print the low part of a DImode register
2067 P -- print the high part of a DImode register
2068 r -- print the absolute value of an integer operand, shifted right by 1
2069 R -- print the absolute value of an integer operand, shifted right by 2
2070 f -- the first clear bit in an integer operand assumed to be a mask for
2071 a clr instruction
2072 F -- the last clear bit in such a mask
2073 s -- the first set bit in an integer operand assumed to be a mask for
2074 a set instruction
2075 S -- the last set bit in such a mask
2076 U -- print either 1 or 2, depending on the side of the machine used by
2077 the operand */
2078
2079 static void
2080 c6x_print_operand (FILE *file, rtx x, int code)
2081 {
2082 int i;
2083 HOST_WIDE_INT v;
2084 tree t;
2085 machine_mode mode;
2086
2087 if (code == '|')
2088 {
2089 if (GET_MODE (c6x_current_insn) != TImode)
2090 fputs ("||", file);
2091 return;
2092 }
2093 if (code == '$')
2094 {
2095 c6x_print_unit_specifier_field (file, c6x_current_insn);
2096 return;
2097 }
2098
2099 if (code == '.')
2100 {
2101 x = current_insn_predicate;
2102 if (x)
2103 {
2104 unsigned int regno = REGNO (XEXP (x, 0));
2105 fputs ("[", file);
2106 if (GET_CODE (x) == EQ)
2107 fputs ("!", file);
2108 fputs (reg_names [regno], file);
2109 fputs ("]", file);
2110 }
2111 return;
2112 }
2113
2114 mode = GET_MODE (x);
2115
2116 switch (code)
2117 {
2118 case 'C':
2119 case 'c':
2120 {
2121 enum rtx_code c = GET_CODE (x);
2122 if (code == 'C')
2123 c = swap_condition (c);
2124 fputs (GET_RTX_NAME (c), file);
2125 }
2126 return;
2127
2128 case 'J':
2129 case 'j':
2130 {
2131 unsigned int regno = REGNO (XEXP (x, 0));
2132 if ((GET_CODE (x) == EQ) == (code == 'J'))
2133 fputs ("!", file);
2134 fputs (reg_names [regno], file);
2135 }
2136 return;
2137
2138 case 'k':
2139 gcc_assert (GET_CODE (x) == CONST_INT);
2140 v = INTVAL (x);
2141 fprintf (file, "%s", reg_names[v]);
2142 return;
2143 case 'K':
2144 gcc_assert (GET_CODE (x) == CONST_INT);
2145 v = INTVAL (x);
2146 gcc_assert ((v & 1) == 0);
2147 fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2148 return;
2149
2150 case 's':
2151 case 'S':
2152 case 'f':
2153 case 'F':
2154 gcc_assert (GET_CODE (x) == CONST_INT);
2155 v = INTVAL (x);
2156 for (i = 0; i < 32; i++)
2157 {
2158 HOST_WIDE_INT tst = v & 1;
2159 if (((code == 'f' || code == 'F') && !tst)
2160 || ((code == 's' || code == 'S') && tst))
2161 break;
2162 v >>= 1;
2163 }
2164 if (code == 'f' || code == 's')
2165 {
2166 fprintf (file, "%d", i);
2167 return;
2168 }
2169 for (;i < 32; i++)
2170 {
2171 HOST_WIDE_INT tst = v & 1;
2172 if ((code == 'F' && tst) || (code == 'S' && !tst))
2173 break;
2174 v >>= 1;
2175 }
2176 fprintf (file, "%d", i - 1);
2177 return;
2178
2179 case 'n':
2180 gcc_assert (GET_CODE (x) == CONST_INT);
2181 output_addr_const (file, GEN_INT (-INTVAL (x)));
2182 return;
2183
2184 case 'r':
2185 gcc_assert (GET_CODE (x) == CONST_INT);
2186 v = INTVAL (x);
2187 if (v < 0)
2188 v = -v;
2189 output_addr_const (file, GEN_INT (v >> 1));
2190 return;
2191
2192 case 'R':
2193 gcc_assert (GET_CODE (x) == CONST_INT);
2194 v = INTVAL (x);
2195 if (v < 0)
2196 v = -v;
2197 output_addr_const (file, GEN_INT (v >> 2));
2198 return;
2199
2200 case 'd':
2201 gcc_assert (GET_CODE (x) == CONST_INT);
2202 v = INTVAL (x);
2203 fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2204 return;
2205
2206 case 'p':
2207 case 'P':
2208 gcc_assert (GET_CODE (x) == REG);
2209 v = REGNO (x);
2210 if (code == 'P')
2211 v++;
2212 fputs (reg_names[v], file);
2213 return;
2214
2215 case 'D':
2216 v = 0;
2217 if (GET_CODE (x) == CONST)
2218 {
2219 x = XEXP (x, 0);
2220 gcc_assert (GET_CODE (x) == PLUS);
2221 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2222 v = INTVAL (XEXP (x, 1));
2223 x = XEXP (x, 0);
2224
2225 }
2226 gcc_assert (GET_CODE (x) == SYMBOL_REF);
2227
2228 t = SYMBOL_REF_DECL (x);
2229 if (DECL_P (t))
2230 v |= DECL_ALIGN_UNIT (t);
2231 else
2232 v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2233 if (v & 1)
2234 fputs ("b", file);
2235 else if (v & 2)
2236 fputs ("h", file);
2237 else
2238 fputs ("w", file);
2239 return;
2240
2241 case 'U':
2242 if (MEM_P (x))
2243 {
2244 x = XEXP (x, 0);
2245 if (GET_CODE (x) == PLUS
2246 || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2247 x = XEXP (x, 0);
2248 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2249 {
2250 gcc_assert (sdata_symbolic_operand (x, Pmode));
2251 fputs ("2", file);
2252 return;
2253 }
2254 }
2255 gcc_assert (REG_P (x));
2256 if (A_REGNO_P (REGNO (x)))
2257 fputs ("1", file);
2258 if (B_REGNO_P (REGNO (x)))
2259 fputs ("2", file);
2260 return;
2261
2262 default:
2263 switch (GET_CODE (x))
2264 {
2265 case REG:
2266 if (GET_MODE_SIZE (mode) == 8)
2267 fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2268 reg_names[REGNO (x)]);
2269 else
2270 fprintf (file, "%s", reg_names[REGNO (x)]);
2271 break;
2272
2273 case MEM:
2274 fputc ('*', file);
2275 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2276 c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2277 break;
2278
2279 case SYMBOL_REF:
2280 fputc ('(', file);
2281 output_addr_const (file, x);
2282 fputc (')', file);
2283 break;
2284
2285 case CONST_INT:
2286 output_addr_const (file, x);
2287 break;
2288
2289 case CONST_DOUBLE:
2290 output_operand_lossage ("invalid const_double operand");
2291 break;
2292
2293 default:
2294 output_addr_const (file, x);
2295 }
2296 }
2297 }
2298 \f
2299 /* Return TRUE if OP is a valid memory address with a base register of
2300 class C. If SMALL_OFFSET is true, we disallow memory references which would
2301 require a long offset with B14/B15. */
2302
2303 bool
2304 c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2305 {
2306 machine_mode mode = GET_MODE (op);
2307 rtx base = XEXP (op, 0);
2308 switch (GET_CODE (base))
2309 {
2310 case REG:
2311 break;
2312 case PLUS:
2313 if (small_offset
2314 && (XEXP (base, 0) == stack_pointer_rtx
2315 || XEXP (base, 0) == pic_offset_table_rtx))
2316 {
2317 if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2318 return false;
2319 }
2320
2321 /* fall through */
2322 case PRE_INC:
2323 case PRE_DEC:
2324 case PRE_MODIFY:
2325 case POST_INC:
2326 case POST_DEC:
2327 case POST_MODIFY:
2328 base = XEXP (base, 0);
2329 break;
2330
2331 case CONST:
2332 case LABEL_REF:
2333 case SYMBOL_REF:
2334 gcc_assert (sdata_symbolic_operand (base, Pmode));
2335 return !small_offset && c == B_REGS;
2336
2337 default:
2338 return false;
2339 }
2340 return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2341 }
2342
2343 /* Returns true if X is a valid address for use in a memory reference
2344 of mode MODE. If STRICT is true, we do not allow pseudo registers
2345 in the address. NO_LARGE_OFFSET is true if we are examining an
2346 address for use in a load or store misaligned instruction, or
2347 recursively examining an operand inside a PRE/POST_MODIFY. */
2348
2349 bool
2350 c6x_legitimate_address_p_1 (machine_mode mode, rtx x, bool strict,
2351 bool no_large_offset)
2352 {
2353 int size, size1;
2354 HOST_WIDE_INT off;
2355 enum rtx_code code = GET_CODE (x);
2356
2357 switch (code)
2358 {
2359 case PRE_MODIFY:
2360 case POST_MODIFY:
2361 /* We can't split these into word-sized pieces yet. */
2362 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2363 return false;
2364 if (GET_CODE (XEXP (x, 1)) != PLUS)
2365 return false;
2366 if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2367 return false;
2368 if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2369 return false;
2370
2371 /* fall through */
2372 case PRE_INC:
2373 case PRE_DEC:
2374 case POST_INC:
2375 case POST_DEC:
2376 /* We can't split these into word-sized pieces yet. */
2377 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2378 return false;
2379 x = XEXP (x, 0);
2380 if (!REG_P (x))
2381 return false;
2382
2383 /* fall through */
2384 case REG:
2385 if (strict)
2386 return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2387 else
2388 return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2389
2390 case PLUS:
2391 if (!REG_P (XEXP (x, 0))
2392 || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2393 return false;
2394 /* We cannot ensure currently that both registers end up in the
2395 same register file. */
2396 if (REG_P (XEXP (x, 1)))
2397 return false;
2398
2399 if (mode == BLKmode)
2400 size = 4;
2401 else if (mode == VOIDmode)
2402 /* ??? This can happen during ivopts. */
2403 size = 1;
2404 else
2405 size = GET_MODE_SIZE (mode);
2406
2407 if (flag_pic
2408 && GET_CODE (XEXP (x, 1)) == UNSPEC
2409 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2410 && XEXP (x, 0) == pic_offset_table_rtx
2411 && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2412 return !no_large_offset && size <= 4;
2413 if (flag_pic == 1
2414 && mode == Pmode
2415 && GET_CODE (XEXP (x, 1)) == UNSPEC
2416 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2417 && XEXP (x, 0) == pic_offset_table_rtx
2418 && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2419 || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2420 return !no_large_offset;
2421 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2422 return false;
2423
2424 off = INTVAL (XEXP (x, 1));
2425
2426 /* If the machine does not have doubleword load/stores, we'll use
2427 word size accesses. */
2428 size1 = size;
2429 if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2430 size = UNITS_PER_WORD;
2431
2432 if (((HOST_WIDE_INT)size1 - 1) & off)
2433 return false;
2434 off /= size;
2435 if (off > -32 && off < (size1 == size ? 32 : 28))
2436 return true;
2437 if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2438 || size1 > UNITS_PER_WORD)
2439 return false;
2440 return off >= 0 && off < 32768;
2441
2442 case CONST:
2443 case SYMBOL_REF:
2444 case LABEL_REF:
2445 return (!no_large_offset
2446 /* With -fpic, we must wrap it in an unspec to show the B14
2447 dependency. */
2448 && !flag_pic
2449 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2450 && sdata_symbolic_operand (x, Pmode));
2451
2452 default:
2453 return false;
2454 }
2455 }
2456
2457 static bool
2458 c6x_legitimate_address_p (machine_mode mode, rtx x, bool strict)
2459 {
2460 return c6x_legitimate_address_p_1 (mode, x, strict, false);
2461 }
2462
2463 static bool
2464 c6x_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
2465 rtx x ATTRIBUTE_UNUSED)
2466 {
2467 return true;
2468 }
2469 \f
2470 /* Implements TARGET_PREFERRED_RENAME_CLASS. */
2471 static reg_class_t
2472 c6x_preferred_rename_class (reg_class_t cl)
2473 {
2474 if (cl == A_REGS)
2475 return NONPREDICATE_A_REGS;
2476 if (cl == B_REGS)
2477 return NONPREDICATE_B_REGS;
2478 if (cl == ALL_REGS || cl == GENERAL_REGS)
2479 return NONPREDICATE_REGS;
2480 return NO_REGS;
2481 }
2482 \f
2483 /* Implements FINAL_PRESCAN_INSN. */
2484 void
2485 c6x_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
2486 int noperands ATTRIBUTE_UNUSED)
2487 {
2488 c6x_current_insn = insn;
2489 }
2490 \f
2491 /* A structure to describe the stack layout of a function. The layout is
2492 as follows:
2493
2494 [saved frame pointer (or possibly padding0)]
2495 --> incoming stack pointer, new hard frame pointer
2496 [saved call-used regs]
2497 [optional padding1]
2498 --> soft frame pointer
2499 [frame]
2500 [outgoing arguments]
2501 [optional padding2]
2502
2503 The structure members are laid out in this order. */
2504
2505 struct c6x_frame
2506 {
2507 int padding0;
2508 /* Number of registers to save. */
2509 int nregs;
2510 int padding1;
2511 HOST_WIDE_INT frame;
2512 int outgoing_arguments_size;
2513 int padding2;
2514
2515 HOST_WIDE_INT to_allocate;
2516 /* The offsets relative to the incoming stack pointer (which
2517 becomes HARD_FRAME_POINTER). */
2518 HOST_WIDE_INT frame_pointer_offset;
2519 HOST_WIDE_INT b3_offset;
2520
2521 /* True if we should call push_rts/pop_rts to save and restore
2522 registers. */
2523 bool push_rts;
2524 };
2525
2526 /* Return true if we need to save and modify the PIC register in the
2527 prologue. */
2528
2529 static bool
2530 must_reload_pic_reg_p (void)
2531 {
2532 struct cgraph_local_info *i = NULL;
2533
2534 if (!TARGET_DSBT)
2535 return false;
2536
2537 i = cgraph_node::local_info (current_function_decl);
2538
2539 if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
2540 return true;
2541 return false;
2542 }
2543
2544 /* Return 1 if we need to save REGNO. */
2545 static int
2546 c6x_save_reg (unsigned int regno)
2547 {
2548 return ((df_regs_ever_live_p (regno)
2549 && !call_used_regs[regno]
2550 && !fixed_regs[regno])
2551 || (regno == RETURN_ADDR_REGNO
2552 && (df_regs_ever_live_p (regno)
2553 || !crtl->is_leaf))
2554 || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2555 }
2556
2557 /* Examine the number of regs NREGS we've determined we must save.
2558 Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2559 prologue and epilogue. */
2560
2561 static bool
2562 use_push_rts_p (int nregs)
2563 {
2564 if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2565 && !cfun->machine->contains_sibcall
2566 && !cfun->returns_struct
2567 && !TARGET_LONG_CALLS
2568 && nregs >= 6 && !frame_pointer_needed)
2569 return true;
2570 return false;
2571 }
2572
2573 /* Return number of saved general prupose registers. */
2574
2575 int
2576 c6x_nsaved_regs (void)
2577 {
2578 int nregs = 0;
2579 int regno;
2580
2581 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2582 if (c6x_save_reg (regno))
2583 nregs++;
2584 return nregs;
2585 }
2586
2587 /* The safe debug order mandated by the ABI. */
2588 static unsigned reg_save_order[] =
2589 {
2590 REG_A10, REG_A11, REG_A12, REG_A13,
2591 REG_A14, REG_B3,
2592 REG_B10, REG_B11, REG_B12, REG_B13,
2593 REG_B14, REG_A15
2594 };
2595
2596 #define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2597
2598 /* Compute the layout of the stack frame and store it in FRAME. */
2599
2600 static void
2601 c6x_compute_frame_layout (struct c6x_frame *frame)
2602 {
2603 HOST_WIDE_INT size = get_frame_size ();
2604 HOST_WIDE_INT offset;
2605 int nregs;
2606
2607 /* We use the four bytes which are technically inside the caller's frame,
2608 usually to save the frame pointer. */
2609 offset = -4;
2610 frame->padding0 = 0;
2611 nregs = c6x_nsaved_regs ();
2612 frame->push_rts = false;
2613 frame->b3_offset = 0;
2614 if (use_push_rts_p (nregs))
2615 {
2616 frame->push_rts = true;
2617 frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2618 nregs = 14;
2619 }
2620 else if (c6x_save_reg (REG_B3))
2621 {
2622 int idx;
2623 for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2624 {
2625 if (c6x_save_reg (reg_save_order[idx]))
2626 frame->b3_offset -= 4;
2627 }
2628 }
2629 frame->nregs = nregs;
2630
2631 if (size == 0 && nregs == 0)
2632 {
2633 frame->padding0 = 4;
2634 frame->padding1 = frame->padding2 = 0;
2635 frame->frame_pointer_offset = frame->to_allocate = 0;
2636 frame->outgoing_arguments_size = 0;
2637 return;
2638 }
2639
2640 if (!frame->push_rts)
2641 offset += frame->nregs * 4;
2642
2643 if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
2644 && !crtl->is_leaf)
2645 /* Don't use the bottom of the caller's frame if we have no
2646 allocation of our own and call other functions. */
2647 frame->padding0 = frame->padding1 = 4;
2648 else if (offset & 4)
2649 frame->padding1 = 4;
2650 else
2651 frame->padding1 = 0;
2652
2653 offset += frame->padding0 + frame->padding1;
2654 frame->frame_pointer_offset = offset;
2655 offset += size;
2656
2657 frame->outgoing_arguments_size = crtl->outgoing_args_size;
2658 offset += frame->outgoing_arguments_size;
2659
2660 if ((offset & 4) == 0)
2661 frame->padding2 = 8;
2662 else
2663 frame->padding2 = 4;
2664 frame->to_allocate = offset + frame->padding2;
2665 }
2666
2667 /* Return the offset between two registers, one to be eliminated, and the other
2668 its replacement, at the start of a routine. */
2669
2670 HOST_WIDE_INT
2671 c6x_initial_elimination_offset (int from, int to)
2672 {
2673 struct c6x_frame frame;
2674 c6x_compute_frame_layout (&frame);
2675
2676 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2677 return 0;
2678 else if (from == FRAME_POINTER_REGNUM
2679 && to == HARD_FRAME_POINTER_REGNUM)
2680 return -frame.frame_pointer_offset;
2681 else
2682 {
2683 gcc_assert (to == STACK_POINTER_REGNUM);
2684
2685 if (from == ARG_POINTER_REGNUM)
2686 return frame.to_allocate + (frame.push_rts ? 56 : 0);
2687
2688 gcc_assert (from == FRAME_POINTER_REGNUM);
2689 return frame.to_allocate - frame.frame_pointer_offset;
2690 }
2691 }
2692
2693 /* Given FROM and TO register numbers, say whether this elimination is
2694 allowed. Frame pointer elimination is automatically handled. */
2695
2696 static bool
2697 c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2698 {
2699 if (to == STACK_POINTER_REGNUM)
2700 return !frame_pointer_needed;
2701 return true;
2702 }
2703
2704 /* Emit insns to increment the stack pointer by OFFSET. If
2705 FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2706 Does nothing if the offset is zero. */
2707
2708 static void
2709 emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2710 {
2711 rtx to_add = GEN_INT (offset);
2712 rtx orig_to_add = to_add;
2713 rtx_insn *insn;
2714
2715 if (offset == 0)
2716 return;
2717
2718 if (offset < -32768 || offset > 32767)
2719 {
2720 rtx reg = gen_rtx_REG (SImode, REG_A0);
2721 rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2722
2723 insn = emit_insn (gen_movsi_high (reg, low));
2724 if (frame_related_p)
2725 RTX_FRAME_RELATED_P (insn) = 1;
2726 insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2727 if (frame_related_p)
2728 RTX_FRAME_RELATED_P (insn) = 1;
2729 to_add = reg;
2730 }
2731 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2732 to_add));
2733 if (frame_related_p)
2734 {
2735 if (REG_P (to_add))
2736 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2737 gen_rtx_SET (stack_pointer_rtx,
2738 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2739 orig_to_add)));
2740
2741 RTX_FRAME_RELATED_P (insn) = 1;
2742 }
2743 }
2744
2745 /* Prologue and epilogue. */
2746 void
2747 c6x_expand_prologue (void)
2748 {
2749 struct c6x_frame frame;
2750 rtx_insn *insn;
2751 rtx mem;
2752 int nsaved = 0;
2753 HOST_WIDE_INT initial_offset, off, added_already;
2754
2755 c6x_compute_frame_layout (&frame);
2756
2757 if (flag_stack_usage_info)
2758 current_function_static_stack_size = frame.to_allocate;
2759
2760 initial_offset = -frame.to_allocate;
2761 if (frame.push_rts)
2762 {
2763 emit_insn (gen_push_rts ());
2764 nsaved = frame.nregs;
2765 }
2766
2767 /* If the offsets would be too large for the memory references we will
2768 create to save registers, do the stack allocation in two parts.
2769 Ensure by subtracting 8 that we don't store to the word pointed to
2770 by the stack pointer. */
2771 if (initial_offset < -32768)
2772 initial_offset = -frame.frame_pointer_offset - 8;
2773
2774 if (frame.to_allocate > 0)
2775 gcc_assert (initial_offset != 0);
2776
2777 off = -initial_offset + 4 - frame.padding0;
2778
2779 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2780
2781 added_already = 0;
2782 if (frame_pointer_needed)
2783 {
2784 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2785 /* We go through some contortions here to both follow the ABI's
2786 recommendation that FP == incoming SP, and to avoid writing or
2787 reading the word pointed to by the stack pointer. */
2788 rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2789 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2790 GEN_INT (-8)));
2791 insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2792 RTX_FRAME_RELATED_P (insn) = 1;
2793 nsaved++;
2794 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2795 GEN_INT (8)));
2796 RTX_FRAME_RELATED_P (insn) = 1;
2797 off -= 4;
2798 added_already = -8;
2799 }
2800
2801 emit_add_sp_const (initial_offset - added_already, true);
2802
2803 if (nsaved < frame.nregs)
2804 {
2805 unsigned i;
2806
2807 for (i = 0; i < N_SAVE_ORDER; i++)
2808 {
2809 int idx = N_SAVE_ORDER - i - 1;
2810 unsigned regno = reg_save_order[idx];
2811 rtx reg;
2812 machine_mode save_mode = SImode;
2813
2814 if (regno == REG_A15 && frame_pointer_needed)
2815 /* Already saved. */
2816 continue;
2817 if (!c6x_save_reg (regno))
2818 continue;
2819
2820 if (TARGET_STDW && (off & 4) == 0 && off <= 256
2821 && (regno & 1) == 1
2822 && i + 1 < N_SAVE_ORDER
2823 && reg_save_order[idx - 1] == regno - 1
2824 && c6x_save_reg (regno - 1))
2825 {
2826 save_mode = DImode;
2827 regno--;
2828 i++;
2829 }
2830 reg = gen_rtx_REG (save_mode, regno);
2831 off -= GET_MODE_SIZE (save_mode);
2832
2833 insn = emit_move_insn (adjust_address (mem, save_mode, off),
2834 reg);
2835 RTX_FRAME_RELATED_P (insn) = 1;
2836
2837 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2838 }
2839 }
2840 gcc_assert (nsaved == frame.nregs);
2841 emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2842 if (must_reload_pic_reg_p ())
2843 {
2844 if (dsbt_decl == NULL)
2845 {
2846 tree t;
2847
2848 t = build_index_type (integer_one_node);
2849 t = build_array_type (integer_type_node, t);
2850 t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2851 get_identifier ("__c6xabi_DSBT_BASE"), t);
2852 DECL_ARTIFICIAL (t) = 1;
2853 DECL_IGNORED_P (t) = 1;
2854 DECL_EXTERNAL (t) = 1;
2855 TREE_STATIC (t) = 1;
2856 TREE_PUBLIC (t) = 1;
2857 TREE_USED (t) = 1;
2858
2859 dsbt_decl = t;
2860 }
2861 emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2862 XEXP (DECL_RTL (dsbt_decl), 0)));
2863 }
2864 }
2865
2866 void
2867 c6x_expand_epilogue (bool sibcall)
2868 {
2869 unsigned i;
2870 struct c6x_frame frame;
2871 rtx mem;
2872 HOST_WIDE_INT off;
2873 int nsaved = 0;
2874
2875 c6x_compute_frame_layout (&frame);
2876
2877 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2878
2879 /* Insert a dummy set/use of the stack pointer. This creates a
2880 scheduler barrier between the prologue saves and epilogue restores. */
2881 emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2882
2883 /* If the offsets would be too large for the memory references we will
2884 create to restore registers, do a preliminary stack adjustment here. */
2885 off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2886 if (frame.push_rts)
2887 {
2888 nsaved = frame.nregs;
2889 }
2890 else
2891 {
2892 if (frame.to_allocate > 32768)
2893 {
2894 /* Don't add the entire offset so that we leave an unused word
2895 above the stack pointer. */
2896 emit_add_sp_const ((off - 16) & ~7, false);
2897 off &= 7;
2898 off += 16;
2899 }
2900 for (i = 0; i < N_SAVE_ORDER; i++)
2901 {
2902 unsigned regno = reg_save_order[i];
2903 rtx reg;
2904 machine_mode save_mode = SImode;
2905
2906 if (!c6x_save_reg (regno))
2907 continue;
2908 if (regno == REG_A15 && frame_pointer_needed)
2909 continue;
2910
2911 if (TARGET_STDW && (off & 4) == 0 && off < 256
2912 && (regno & 1) == 0
2913 && i + 1 < N_SAVE_ORDER
2914 && reg_save_order[i + 1] == regno + 1
2915 && c6x_save_reg (regno + 1))
2916 {
2917 save_mode = DImode;
2918 i++;
2919 }
2920 reg = gen_rtx_REG (save_mode, regno);
2921
2922 emit_move_insn (reg, adjust_address (mem, save_mode, off));
2923
2924 off += GET_MODE_SIZE (save_mode);
2925 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2926 }
2927 }
2928 if (!frame_pointer_needed)
2929 emit_add_sp_const (off + frame.padding0 - 4, false);
2930 else
2931 {
2932 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2933 rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2934 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2935 GEN_INT (8)));
2936 emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2937 GEN_INT (-8)));
2938 emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2939 nsaved++;
2940 }
2941 gcc_assert (nsaved == frame.nregs);
2942 if (!sibcall)
2943 {
2944 if (frame.push_rts)
2945 emit_jump_insn (gen_pop_rts ());
2946 else
2947 emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2948 RETURN_ADDR_REGNO)));
2949 }
2950 }
2951
2952 /* Return the value of the return address for the frame COUNT steps up
2953 from the current frame, after the prologue.
2954 We punt for everything but the current frame by returning const0_rtx. */
2955
2956 rtx
2957 c6x_return_addr_rtx (int count)
2958 {
2959 if (count != 0)
2960 return const0_rtx;
2961
2962 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2963 }
2964 \f
2965 /* Return true iff TYPE is one of the shadow types. */
2966 static bool
2967 shadow_type_p (enum attr_type type)
2968 {
2969 return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2970 || type == TYPE_MULT_SHADOW);
2971 }
2972
2973 /* Return true iff INSN is a shadow pattern. */
2974 static bool
2975 shadow_p (rtx_insn *insn)
2976 {
2977 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2978 return false;
2979 return shadow_type_p (get_attr_type (insn));
2980 }
2981
2982 /* Return true iff INSN is a shadow or blockage pattern. */
2983 static bool
2984 shadow_or_blockage_p (rtx_insn *insn)
2985 {
2986 enum attr_type type;
2987 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2988 return false;
2989 type = get_attr_type (insn);
2990 return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2991 }
2992 \f
2993 /* Translate UNITS into a bitmask of units we can reserve for this
2994 insn. */
2995 static int
2996 get_reservation_flags (enum attr_units units)
2997 {
2998 switch (units)
2999 {
3000 case UNITS_D:
3001 case UNITS_D_ADDR:
3002 return RESERVATION_FLAG_D;
3003 case UNITS_L:
3004 return RESERVATION_FLAG_L;
3005 case UNITS_S:
3006 return RESERVATION_FLAG_S;
3007 case UNITS_M:
3008 return RESERVATION_FLAG_M;
3009 case UNITS_LS:
3010 return RESERVATION_FLAG_LS;
3011 case UNITS_DL:
3012 return RESERVATION_FLAG_DL;
3013 case UNITS_DS:
3014 return RESERVATION_FLAG_DS;
3015 case UNITS_DLS:
3016 return RESERVATION_FLAG_DLS;
3017 default:
3018 return 0;
3019 }
3020 }
3021
3022 /* Compute the side of the machine used by INSN, which reserves UNITS.
3023 This must match the reservations in the scheduling description. */
3024 static int
3025 get_insn_side (rtx_insn *insn, enum attr_units units)
3026 {
3027 if (units == UNITS_D_ADDR)
3028 return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3029 else
3030 {
3031 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3032 if (rf == DEST_REGFILE_ANY)
3033 return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3034 else
3035 return rf == DEST_REGFILE_A ? 0 : 1;
3036 }
3037 }
3038
3039 /* After scheduling, walk the insns between HEAD and END and assign unit
3040 reservations. */
3041 static void
3042 assign_reservations (rtx_insn *head, rtx_insn *end)
3043 {
3044 rtx_insn *insn;
3045 for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3046 {
3047 unsigned int sched_mask, reserved;
3048 rtx_insn *within, *last;
3049 int pass;
3050 int rsrv[2];
3051 int rsrv_count[2][4];
3052 int i;
3053
3054 if (GET_MODE (insn) != TImode)
3055 continue;
3056
3057 reserved = 0;
3058 last = NULL;
3059 /* Find the last insn in the packet. It has a state recorded for it,
3060 which we can use to determine the units we should be using. */
3061 for (within = insn;
3062 (within != NEXT_INSN (end)
3063 && (within == insn || GET_MODE (within) != TImode));
3064 within = NEXT_INSN (within))
3065 {
3066 int icode;
3067 if (!NONDEBUG_INSN_P (within))
3068 continue;
3069 icode = recog_memoized (within);
3070 if (icode < 0)
3071 continue;
3072 if (shadow_p (within))
3073 continue;
3074 if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3075 reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3076 last = within;
3077 }
3078 if (last == NULL_RTX)
3079 continue;
3080
3081 sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3082 sched_mask &= ~reserved;
3083
3084 memset (rsrv_count, 0, sizeof rsrv_count);
3085 rsrv[0] = rsrv[1] = ~0;
3086 for (i = 0; i < 8; i++)
3087 {
3088 int side = i / 4;
3089 int unit = i & 3;
3090 unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3091 /* Clear the bits which we expect to reserve in the following loop,
3092 leaving the ones set which aren't present in the scheduler's
3093 state and shouldn't be reserved. */
3094 if (sched_mask & unit_bit)
3095 rsrv[i / 4] &= ~(1 << unit);
3096 }
3097
3098 /* Walk through the insns that occur in the same cycle. We use multiple
3099 passes to assign units, assigning for insns with the most specific
3100 requirements first. */
3101 for (pass = 0; pass < 4; pass++)
3102 for (within = insn;
3103 (within != NEXT_INSN (end)
3104 && (within == insn || GET_MODE (within) != TImode));
3105 within = NEXT_INSN (within))
3106 {
3107 int uid = INSN_UID (within);
3108 int this_rsrv, side;
3109 int icode;
3110 enum attr_units units;
3111 enum attr_type type;
3112 int j;
3113
3114 if (!NONDEBUG_INSN_P (within))
3115 continue;
3116 icode = recog_memoized (within);
3117 if (icode < 0)
3118 continue;
3119 if (INSN_INFO_ENTRY (uid).reservation != 0)
3120 continue;
3121 units = get_attr_units (within);
3122 type = get_attr_type (within);
3123 this_rsrv = get_reservation_flags (units);
3124 if (this_rsrv == 0)
3125 continue;
3126 side = get_insn_side (within, units);
3127
3128 /* Certain floating point instructions are treated specially. If
3129 an insn can choose between units it can reserve, and its
3130 reservation spans more than one cycle, the reservation contains
3131 special markers in the first cycle to help us reconstruct what
3132 the automaton chose. */
3133 if ((type == TYPE_ADDDP || type == TYPE_FP4)
3134 && units == UNITS_LS)
3135 {
3136 int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3137 + side * UNIT_QID_SIDE_OFFSET);
3138 int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3139 + side * UNIT_QID_SIDE_OFFSET);
3140 if ((sched_mask & (1 << test1_code)) != 0)
3141 {
3142 this_rsrv = RESERVATION_FLAG_L;
3143 sched_mask &= ~(1 << test1_code);
3144 }
3145 else if ((sched_mask & (1 << test2_code)) != 0)
3146 {
3147 this_rsrv = RESERVATION_FLAG_S;
3148 sched_mask &= ~(1 << test2_code);
3149 }
3150 }
3151
3152 if ((this_rsrv & (this_rsrv - 1)) == 0)
3153 {
3154 int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
3155 rsrv[side] |= this_rsrv;
3156 INSN_INFO_ENTRY (uid).reservation = t;
3157 continue;
3158 }
3159
3160 if (pass == 1)
3161 {
3162 for (j = 0; j < 4; j++)
3163 if (this_rsrv & (1 << j))
3164 rsrv_count[side][j]++;
3165 continue;
3166 }
3167 if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3168 || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3169 {
3170 int best = -1, best_cost = INT_MAX;
3171 for (j = 0; j < 4; j++)
3172 if ((this_rsrv & (1 << j))
3173 && !(rsrv[side] & (1 << j))
3174 && rsrv_count[side][j] < best_cost)
3175 {
3176 best_cost = rsrv_count[side][j];
3177 best = j;
3178 }
3179 gcc_assert (best != -1);
3180 rsrv[side] |= 1 << best;
3181 for (j = 0; j < 4; j++)
3182 if ((this_rsrv & (1 << j)) && j != best)
3183 rsrv_count[side][j]--;
3184
3185 INSN_INFO_ENTRY (uid).reservation
3186 = best + side * UNIT_QID_SIDE_OFFSET;
3187 }
3188 }
3189 }
3190 }
3191
3192 /* Return a factor by which to weight unit imbalances for a reservation
3193 R. */
3194 static int
3195 unit_req_factor (enum unitreqs r)
3196 {
3197 switch (r)
3198 {
3199 case UNIT_REQ_D:
3200 case UNIT_REQ_L:
3201 case UNIT_REQ_S:
3202 case UNIT_REQ_M:
3203 case UNIT_REQ_X:
3204 case UNIT_REQ_T:
3205 return 1;
3206 case UNIT_REQ_DL:
3207 case UNIT_REQ_LS:
3208 case UNIT_REQ_DS:
3209 return 2;
3210 case UNIT_REQ_DLS:
3211 return 3;
3212 default:
3213 gcc_unreachable ();
3214 }
3215 }
3216
3217 /* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3218 requirements. Returns zero if INSN can't be handled, otherwise
3219 either one or two to show how many of the two pairs are in use.
3220 REQ1 is always used, it holds what is normally thought of as the
3221 instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either
3222 describe a cross path, or for loads/stores, the T unit. */
3223 static int
3224 get_unit_reqs (rtx_insn *insn, int *req1, int *side1, int *req2, int *side2)
3225 {
3226 enum attr_units units;
3227 enum attr_cross cross;
3228 int side, req;
3229
3230 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3231 return 0;
3232 units = get_attr_units (insn);
3233 if (units == UNITS_UNKNOWN)
3234 return 0;
3235 side = get_insn_side (insn, units);
3236 cross = get_attr_cross (insn);
3237
3238 req = (units == UNITS_D ? UNIT_REQ_D
3239 : units == UNITS_D_ADDR ? UNIT_REQ_D
3240 : units == UNITS_DL ? UNIT_REQ_DL
3241 : units == UNITS_DS ? UNIT_REQ_DS
3242 : units == UNITS_L ? UNIT_REQ_L
3243 : units == UNITS_LS ? UNIT_REQ_LS
3244 : units == UNITS_S ? UNIT_REQ_S
3245 : units == UNITS_M ? UNIT_REQ_M
3246 : units == UNITS_DLS ? UNIT_REQ_DLS
3247 : -1);
3248 gcc_assert (req != -1);
3249 *req1 = req;
3250 *side1 = side;
3251 if (units == UNITS_D_ADDR)
3252 {
3253 *req2 = UNIT_REQ_T;
3254 *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3255 return 2;
3256 }
3257 else if (cross == CROSS_Y)
3258 {
3259 *req2 = UNIT_REQ_X;
3260 *side2 = side;
3261 return 2;
3262 }
3263 return 1;
3264 }
3265
3266 /* Walk the insns between and including HEAD and TAIL, and mark the
3267 resource requirements in the unit_reqs table. */
3268 static void
3269 count_unit_reqs (unit_req_table reqs, rtx_insn *head, rtx_insn *tail)
3270 {
3271 rtx_insn *insn;
3272
3273 memset (reqs, 0, sizeof (unit_req_table));
3274
3275 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3276 {
3277 int side1, side2, req1, req2;
3278
3279 switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3280 {
3281 case 2:
3282 reqs[side2][req2]++;
3283 /* fall through */
3284 case 1:
3285 reqs[side1][req1]++;
3286 break;
3287 }
3288 }
3289 }
3290
3291 /* Update the table REQS by merging more specific unit reservations into
3292 more general ones, i.e. counting (for example) UNIT_REQ_D also in
3293 UNIT_REQ_DL, DS, and DLS. */
3294 static void
3295 merge_unit_reqs (unit_req_table reqs)
3296 {
3297 int side;
3298 for (side = 0; side < 2; side++)
3299 {
3300 int d = reqs[side][UNIT_REQ_D];
3301 int l = reqs[side][UNIT_REQ_L];
3302 int s = reqs[side][UNIT_REQ_S];
3303 int dl = reqs[side][UNIT_REQ_DL];
3304 int ls = reqs[side][UNIT_REQ_LS];
3305 int ds = reqs[side][UNIT_REQ_DS];
3306
3307 reqs[side][UNIT_REQ_DL] += d;
3308 reqs[side][UNIT_REQ_DL] += l;
3309 reqs[side][UNIT_REQ_DS] += d;
3310 reqs[side][UNIT_REQ_DS] += s;
3311 reqs[side][UNIT_REQ_LS] += l;
3312 reqs[side][UNIT_REQ_LS] += s;
3313 reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3314 }
3315 }
3316
3317 /* Examine the table REQS and return a measure of unit imbalance by comparing
3318 the two sides of the machine. If, for example, D1 is used twice and D2
3319 used not at all, the return value should be 1 in the absence of other
3320 imbalances. */
3321 static int
3322 unit_req_imbalance (unit_req_table reqs)
3323 {
3324 int val = 0;
3325 int i;
3326
3327 for (i = 0; i < UNIT_REQ_MAX; i++)
3328 {
3329 int factor = unit_req_factor ((enum unitreqs) i);
3330 int diff = abs (reqs[0][i] - reqs[1][i]);
3331 val += (diff + factor - 1) / factor / 2;
3332 }
3333 return val;
3334 }
3335
3336 /* Return the resource-constrained minimum iteration interval given the
3337 data in the REQS table. This must have been processed with
3338 merge_unit_reqs already. */
3339 static int
3340 res_mii (unit_req_table reqs)
3341 {
3342 int side, req;
3343 int worst = 1;
3344 for (side = 0; side < 2; side++)
3345 for (req = 0; req < UNIT_REQ_MAX; req++)
3346 {
3347 int factor = unit_req_factor ((enum unitreqs) req);
3348 worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3349 }
3350
3351 return worst;
3352 }
3353
3354 /* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3355 the operands that are involved in the (up to) two reservations, as
3356 found by get_unit_reqs. Return true if we did this successfully, false
3357 if we couldn't identify what to do with INSN. */
3358 static bool
3359 get_unit_operand_masks (rtx_insn *insn, unsigned int *pmask1,
3360 unsigned int *pmask2)
3361 {
3362 enum attr_op_pattern op_pat;
3363
3364 if (recog_memoized (insn) < 0)
3365 return 0;
3366 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3367 return false;
3368 extract_insn (insn);
3369 op_pat = get_attr_op_pattern (insn);
3370 if (op_pat == OP_PATTERN_DT)
3371 {
3372 gcc_assert (recog_data.n_operands == 2);
3373 *pmask1 = 1 << 0;
3374 *pmask2 = 1 << 1;
3375 return true;
3376 }
3377 else if (op_pat == OP_PATTERN_TD)
3378 {
3379 gcc_assert (recog_data.n_operands == 2);
3380 *pmask1 = 1 << 1;
3381 *pmask2 = 1 << 0;
3382 return true;
3383 }
3384 else if (op_pat == OP_PATTERN_SXS)
3385 {
3386 gcc_assert (recog_data.n_operands == 3);
3387 *pmask1 = (1 << 0) | (1 << 2);
3388 *pmask2 = 1 << 1;
3389 return true;
3390 }
3391 else if (op_pat == OP_PATTERN_SX)
3392 {
3393 gcc_assert (recog_data.n_operands == 2);
3394 *pmask1 = 1 << 0;
3395 *pmask2 = 1 << 1;
3396 return true;
3397 }
3398 else if (op_pat == OP_PATTERN_SSX)
3399 {
3400 gcc_assert (recog_data.n_operands == 3);
3401 *pmask1 = (1 << 0) | (1 << 1);
3402 *pmask2 = 1 << 2;
3403 return true;
3404 }
3405 return false;
3406 }
3407
3408 /* Try to replace a register in INSN, which has corresponding rename info
3409 from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information
3410 about the operands that must be renamed and the side they are on.
3411 REQS is the table of unit reservations in the loop between HEAD and TAIL.
3412 We recompute this information locally after our transformation, and keep
3413 it only if we managed to improve the balance. */
3414 static void
3415 try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
3416 rtx insn,
3417 insn_rr_info *info, unsigned int op_mask, int orig_side)
3418 {
3419 enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3420 HARD_REG_SET unavailable;
3421 du_head_p this_head;
3422 struct du_chain *chain;
3423 int i;
3424 unsigned tmp_mask;
3425 int best_reg, old_reg;
3426 vec<du_head_p> involved_chains = vNULL;
3427 unit_req_table new_reqs;
3428 bool ok;
3429
3430 for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3431 {
3432 du_head_p op_chain;
3433 if ((tmp_mask & (1 << i)) == 0)
3434 continue;
3435 if (info->op_info[i].n_chains != 1)
3436 goto out_fail;
3437 op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
3438 involved_chains.safe_push (op_chain);
3439 tmp_mask &= ~(1 << i);
3440 }
3441
3442 if (involved_chains.length () > 1)
3443 goto out_fail;
3444
3445 this_head = involved_chains[0];
3446 if (this_head->cannot_rename)
3447 goto out_fail;
3448
3449 for (chain = this_head->first; chain; chain = chain->next_use)
3450 {
3451 unsigned int mask1, mask2, mask_changed;
3452 int count, side1, side2, req1, req2;
3453 insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
3454
3455 count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3456
3457 if (count == 0)
3458 goto out_fail;
3459
3460 if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3461 goto out_fail;
3462
3463 extract_insn (chain->insn);
3464
3465 mask_changed = 0;
3466 for (i = 0; i < recog_data.n_operands; i++)
3467 {
3468 int j;
3469 int n_this_op = this_rr->op_info[i].n_chains;
3470 for (j = 0; j < n_this_op; j++)
3471 {
3472 du_head_p other = this_rr->op_info[i].heads[j];
3473 if (regrename_chain_from_id (other->id) == this_head)
3474 break;
3475 }
3476 if (j == n_this_op)
3477 continue;
3478
3479 if (n_this_op != 1)
3480 goto out_fail;
3481 mask_changed |= 1 << i;
3482 }
3483 gcc_assert (mask_changed != 0);
3484 if (mask_changed != mask1 && mask_changed != mask2)
3485 goto out_fail;
3486 }
3487
3488 /* If we get here, we can do the renaming. */
3489 COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3490
3491 old_reg = this_head->regno;
3492 best_reg =
3493 find_rename_reg (this_head, super_class, &unavailable, old_reg, true);
3494
3495 ok = regrename_do_replace (this_head, best_reg);
3496 gcc_assert (ok);
3497
3498 count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3499 merge_unit_reqs (new_reqs);
3500 if (dump_file)
3501 {
3502 fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3503 "original side %d, new reg %d\n",
3504 INSN_UID (insn), op_mask, orig_side, best_reg);
3505 fprintf (dump_file, " imbalance %d -> %d\n",
3506 unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3507 }
3508 if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3509 {
3510 ok = regrename_do_replace (this_head, old_reg);
3511 gcc_assert (ok);
3512 }
3513 else
3514 memcpy (reqs, new_reqs, sizeof (unit_req_table));
3515
3516 out_fail:
3517 involved_chains.release ();
3518 }
3519
3520 /* Find insns in LOOP which would, if shifted to the other side
3521 of the machine, reduce an imbalance in the unit reservations. */
3522 static void
3523 reshuffle_units (basic_block loop)
3524 {
3525 rtx_insn *head = BB_HEAD (loop);
3526 rtx_insn *tail = BB_END (loop);
3527 rtx_insn *insn;
3528 unit_req_table reqs;
3529 edge e;
3530 edge_iterator ei;
3531 bitmap_head bbs;
3532
3533 count_unit_reqs (reqs, head, PREV_INSN (tail));
3534 merge_unit_reqs (reqs);
3535
3536 regrename_init (true);
3537
3538 bitmap_initialize (&bbs, &bitmap_default_obstack);
3539
3540 FOR_EACH_EDGE (e, ei, loop->preds)
3541 bitmap_set_bit (&bbs, e->src->index);
3542
3543 bitmap_set_bit (&bbs, loop->index);
3544 regrename_analyze (&bbs);
3545
3546 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3547 {
3548 enum attr_units units;
3549 int count, side1, side2, req1, req2;
3550 unsigned int mask1, mask2;
3551 insn_rr_info *info;
3552
3553 if (!NONDEBUG_INSN_P (insn))
3554 continue;
3555
3556 count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3557
3558 if (count == 0)
3559 continue;
3560
3561 if (!get_unit_operand_masks (insn, &mask1, &mask2))
3562 continue;
3563
3564 info = &insn_rr[INSN_UID (insn)];
3565 if (info->op_info == NULL)
3566 continue;
3567
3568 if (reqs[side1][req1] > 1
3569 && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3570 {
3571 try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3572 }
3573
3574 units = get_attr_units (insn);
3575 if (units == UNITS_D_ADDR)
3576 {
3577 gcc_assert (count == 2);
3578 if (reqs[side2][req2] > 1
3579 && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3580 {
3581 try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3582 }
3583 }
3584 }
3585 regrename_finish ();
3586 }
3587 \f
3588 /* Backend scheduling state. */
3589 typedef struct c6x_sched_context
3590 {
3591 /* The current scheduler clock, saved in the sched_reorder hook. */
3592 int curr_sched_clock;
3593
3594 /* Number of insns issued so far in this cycle. */
3595 int issued_this_cycle;
3596
3597 /* We record the time at which each jump occurs in JUMP_CYCLES. The
3598 theoretical maximum for number of jumps in flight is 12: 2 every
3599 cycle, with a latency of 6 cycles each. This is a circular
3600 buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier
3601 jumps have a higher index. This array should be accessed through
3602 the jump_cycle function. */
3603 int jump_cycles[12];
3604 int jump_cycle_index;
3605
3606 /* In parallel with jump_cycles, this array records the opposite of
3607 the condition used in each pending jump. This is used to
3608 predicate insns that are scheduled in the jump's delay slots. If
3609 this is NULL_RTX no such predication happens. */
3610 rtx jump_cond[12];
3611
3612 /* Similar to the jump_cycles mechanism, but here we take into
3613 account all insns with delay slots, to avoid scheduling asms into
3614 the delay slots. */
3615 int delays_finished_at;
3616
3617 /* The following variable value is the last issued insn. */
3618 rtx_insn *last_scheduled_insn;
3619 /* The last issued insn that isn't a shadow of another. */
3620 rtx_insn *last_scheduled_iter0;
3621
3622 /* The following variable value is DFA state before issuing the
3623 first insn in the current clock cycle. We do not use this member
3624 of the structure directly; we copy the data in and out of
3625 prev_cycle_state. */
3626 state_t prev_cycle_state_ctx;
3627
3628 int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3629 int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3630 int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3631
3632 int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3633 int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3634 } *c6x_sched_context_t;
3635
3636 /* The current scheduling state. */
3637 static struct c6x_sched_context ss;
3638
3639 /* The following variable value is DFA state before issuing the first insn
3640 in the current clock cycle. This is used in c6x_variable_issue for
3641 comparison with the state after issuing the last insn in a cycle. */
3642 static state_t prev_cycle_state;
3643
3644 /* Set when we discover while processing an insn that it would lead to too
3645 many accesses of the same register. */
3646 static bool reg_access_stall;
3647
3648 /* The highest insn uid after delayed insns were split, but before loop bodies
3649 were copied by the modulo scheduling code. */
3650 static int sploop_max_uid_iter0;
3651
3652 /* Look up the jump cycle with index N. For an out-of-bounds N, we return 0,
3653 so the caller does not specifically have to test for it. */
3654 static int
3655 get_jump_cycle (int n)
3656 {
3657 if (n >= 12)
3658 return 0;
3659 n += ss.jump_cycle_index;
3660 if (n >= 12)
3661 n -= 12;
3662 return ss.jump_cycles[n];
3663 }
3664
3665 /* Look up the jump condition with index N. */
3666 static rtx
3667 get_jump_cond (int n)
3668 {
3669 if (n >= 12)
3670 return NULL_RTX;
3671 n += ss.jump_cycle_index;
3672 if (n >= 12)
3673 n -= 12;
3674 return ss.jump_cond[n];
3675 }
3676
3677 /* Return the index of the first jump that occurs after CLOCK_VAR. If no jump
3678 has delay slots beyond CLOCK_VAR, return -1. */
3679 static int
3680 first_jump_index (int clock_var)
3681 {
3682 int retval = -1;
3683 int n = 0;
3684 for (;;)
3685 {
3686 int t = get_jump_cycle (n);
3687 if (t <= clock_var)
3688 break;
3689 retval = n;
3690 n++;
3691 }
3692 return retval;
3693 }
3694
3695 /* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3696 and has the opposite condition of COND. */
3697 static void
3698 record_jump (int cycle, rtx cond)
3699 {
3700 if (ss.jump_cycle_index == 0)
3701 ss.jump_cycle_index = 11;
3702 else
3703 ss.jump_cycle_index--;
3704 ss.jump_cycles[ss.jump_cycle_index] = cycle;
3705 ss.jump_cond[ss.jump_cycle_index] = cond;
3706 }
3707
3708 /* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in
3709 new_conditions. */
3710 static void
3711 insn_set_clock (rtx insn, int cycle)
3712 {
3713 unsigned uid = INSN_UID (insn);
3714
3715 if (uid >= INSN_INFO_LENGTH)
3716 insn_info.safe_grow (uid * 5 / 4 + 10);
3717
3718 INSN_INFO_ENTRY (uid).clock = cycle;
3719 INSN_INFO_ENTRY (uid).new_cond = NULL;
3720 INSN_INFO_ENTRY (uid).reservation = 0;
3721 INSN_INFO_ENTRY (uid).ebb_start = false;
3722 }
3723
3724 /* Return the clock cycle we set for the insn with uid UID. */
3725 static int
3726 insn_uid_get_clock (int uid)
3727 {
3728 return INSN_INFO_ENTRY (uid).clock;
3729 }
3730
3731 /* Return the clock cycle we set for INSN. */
3732 static int
3733 insn_get_clock (rtx insn)
3734 {
3735 return insn_uid_get_clock (INSN_UID (insn));
3736 }
3737
3738 /* Examine INSN, and if it is a conditional jump of any kind, return
3739 the opposite of the condition in which it branches. Otherwise,
3740 return NULL_RTX. */
3741 static rtx
3742 condjump_opposite_condition (rtx insn)
3743 {
3744 rtx pat = PATTERN (insn);
3745 int icode = INSN_CODE (insn);
3746 rtx x = NULL;
3747
3748 if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3749 {
3750 x = XEXP (SET_SRC (pat), 0);
3751 if (icode == CODE_FOR_br_false)
3752 return x;
3753 }
3754 if (GET_CODE (pat) == COND_EXEC)
3755 {
3756 rtx t = COND_EXEC_CODE (pat);
3757 if ((GET_CODE (t) == PARALLEL
3758 && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3759 || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3760 || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3761 x = COND_EXEC_TEST (pat);
3762 }
3763
3764 if (x != NULL_RTX)
3765 {
3766 enum rtx_code code = GET_CODE (x);
3767 x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3768 GET_MODE (x), XEXP (x, 0),
3769 XEXP (x, 1));
3770 }
3771 return x;
3772 }
3773
3774 /* Return true iff COND1 and COND2 are exactly opposite conditions
3775 one of them NE and the other EQ. */
3776 static bool
3777 conditions_opposite_p (rtx cond1, rtx cond2)
3778 {
3779 return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3780 && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3781 && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3782 }
3783
3784 /* Return true if we can add a predicate COND to INSN, or if INSN
3785 already has that predicate. If DOIT is true, also perform the
3786 modification. */
3787 static bool
3788 predicate_insn (rtx_insn *insn, rtx cond, bool doit)
3789 {
3790 int icode;
3791 if (cond == NULL_RTX)
3792 {
3793 gcc_assert (!doit);
3794 return false;
3795 }
3796
3797 if (get_attr_predicable (insn) == PREDICABLE_YES
3798 && GET_CODE (PATTERN (insn)) != COND_EXEC)
3799 {
3800 if (doit)
3801 {
3802 rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3803 PATTERN (insn) = newpat;
3804 INSN_CODE (insn) = -1;
3805 }
3806 return true;
3807 }
3808 if (GET_CODE (PATTERN (insn)) == COND_EXEC
3809 && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3810 return true;
3811 icode = INSN_CODE (insn);
3812 if (icode == CODE_FOR_real_jump
3813 || icode == CODE_FOR_jump
3814 || icode == CODE_FOR_indirect_jump)
3815 {
3816 rtx pat = PATTERN (insn);
3817 rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3818 : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3819 : SET_SRC (pat));
3820 if (doit)
3821 {
3822 rtx newpat;
3823 if (REG_P (dest))
3824 newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3825 else
3826 newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3827 PATTERN (insn) = newpat;
3828 INSN_CODE (insn) = -1;
3829 }
3830 return true;
3831 }
3832 if (INSN_CODE (insn) == CODE_FOR_br_true)
3833 {
3834 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3835 return rtx_equal_p (br_cond, cond);
3836 }
3837 if (INSN_CODE (insn) == CODE_FOR_br_false)
3838 {
3839 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3840 return conditions_opposite_p (br_cond, cond);
3841 }
3842 return false;
3843 }
3844
3845 /* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */
3846 static void
3847 init_sched_state (c6x_sched_context_t sc)
3848 {
3849 sc->last_scheduled_insn = NULL;
3850 sc->last_scheduled_iter0 = NULL;
3851 sc->issued_this_cycle = 0;
3852 memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3853 memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3854 sc->jump_cycle_index = 0;
3855 sc->delays_finished_at = 0;
3856 sc->curr_sched_clock = 0;
3857
3858 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3859
3860 memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3861 memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3862 memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
3863
3864 state_reset (sc->prev_cycle_state_ctx);
3865 }
3866
3867 /* Allocate store for new scheduling context. */
3868 static void *
3869 c6x_alloc_sched_context (void)
3870 {
3871 return xmalloc (sizeof (struct c6x_sched_context));
3872 }
3873
3874 /* If CLEAN_P is true then initializes _SC with clean data,
3875 and from the global context otherwise. */
3876 static void
3877 c6x_init_sched_context (void *_sc, bool clean_p)
3878 {
3879 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3880
3881 if (clean_p)
3882 {
3883 init_sched_state (sc);
3884 }
3885 else
3886 {
3887 *sc = ss;
3888 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3889 memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3890 }
3891 }
3892
3893 /* Sets the global scheduling context to the one pointed to by _SC. */
3894 static void
3895 c6x_set_sched_context (void *_sc)
3896 {
3897 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3898
3899 gcc_assert (sc != NULL);
3900 ss = *sc;
3901 memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3902 }
3903
3904 /* Clear data in _SC. */
3905 static void
3906 c6x_clear_sched_context (void *_sc)
3907 {
3908 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3909 gcc_assert (_sc != NULL);
3910
3911 free (sc->prev_cycle_state_ctx);
3912 }
3913
3914 /* Free _SC. */
3915 static void
3916 c6x_free_sched_context (void *_sc)
3917 {
3918 free (_sc);
3919 }
3920
3921 /* True if we are currently performing a preliminary scheduling
3922 pass before modulo scheduling; we can't allow the scheduler to
3923 modify instruction patterns using packetization assumptions,
3924 since there will be another scheduling pass later if modulo
3925 scheduling fails. */
3926 static bool in_hwloop;
3927
3928 /* Provide information about speculation capabilities, and set the
3929 DO_BACKTRACKING flag. */
3930 static void
3931 c6x_set_sched_flags (spec_info_t spec_info)
3932 {
3933 unsigned int *flags = &(current_sched_info->flags);
3934
3935 if (*flags & SCHED_EBB)
3936 {
3937 *flags |= DO_BACKTRACKING | DO_PREDICATION;
3938 }
3939 if (in_hwloop)
3940 *flags |= DONT_BREAK_DEPENDENCIES;
3941
3942 spec_info->mask = 0;
3943 }
3944
3945 /* Implement the TARGET_SCHED_ISSUE_RATE hook. */
3946
3947 static int
3948 c6x_issue_rate (void)
3949 {
3950 return 8;
3951 }
3952
3953 /* Used together with the collapse_ndfa option, this ensures that we reach a
3954 deterministic automaton state before trying to advance a cycle.
3955 With collapse_ndfa, genautomata creates advance cycle arcs only for
3956 such deterministic states. */
3957
3958 static rtx
3959 c6x_sched_dfa_pre_cycle_insn (void)
3960 {
3961 return const0_rtx;
3962 }
3963
3964 /* We're beginning a new block. Initialize data structures as necessary. */
3965
3966 static void
3967 c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3968 int sched_verbose ATTRIBUTE_UNUSED,
3969 int max_ready ATTRIBUTE_UNUSED)
3970 {
3971 if (prev_cycle_state == NULL)
3972 {
3973 prev_cycle_state = xmalloc (dfa_state_size);
3974 }
3975 init_sched_state (&ss);
3976 state_reset (prev_cycle_state);
3977 }
3978
3979 /* We are about to being issuing INSN. Return nonzero if we cannot
3980 issue it on given cycle CLOCK and return zero if we should not sort
3981 the ready queue on the next clock start.
3982 For C6X, we use this function just to copy the previous DFA state
3983 for comparison purposes. */
3984
3985 static int
3986 c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3987 rtx_insn *insn ATTRIBUTE_UNUSED,
3988 int last_clock ATTRIBUTE_UNUSED,
3989 int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3990 {
3991 if (clock != last_clock)
3992 memcpy (prev_cycle_state, curr_state, dfa_state_size);
3993 return 0;
3994 }
3995
3996 static void
3997 c6x_mark_regno_read (int regno, bool cross)
3998 {
3999 int t = ++ss.tmp_reg_n_accesses[regno];
4000
4001 if (t > 4)
4002 reg_access_stall = true;
4003
4004 if (cross)
4005 {
4006 int set_cycle = ss.reg_set_in_cycle[regno];
4007 /* This must be done in this way rather than by tweaking things in
4008 adjust_cost, since the stall occurs even for insns with opposite
4009 predicates, and the scheduler may not even see a dependency. */
4010 if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
4011 reg_access_stall = true;
4012 /* This doesn't quite do anything yet as we're only modeling one
4013 x unit. */
4014 ++ss.tmp_reg_n_xaccesses[regno];
4015 }
4016 }
4017
4018 /* Note that REG is read in the insn being examined. If CROSS, it
4019 means the access is through a cross path. Update the temporary reg
4020 access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4021 in the current cycle. */
4022
4023 static void
4024 c6x_mark_reg_read (rtx reg, bool cross)
4025 {
4026 unsigned regno = REGNO (reg);
4027 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4028
4029 while (nregs-- > 0)
4030 c6x_mark_regno_read (regno + nregs, cross);
4031 }
4032
4033 /* Note that register REG is written in cycle CYCLES. */
4034
4035 static void
4036 c6x_mark_reg_written (rtx reg, int cycles)
4037 {
4038 unsigned regno = REGNO (reg);
4039 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4040
4041 while (nregs-- > 0)
4042 ss.reg_set_in_cycle[regno + nregs] = cycles;
4043 }
4044
4045 /* Update the register state information for an instruction whose
4046 body is X. Return true if the instruction has to be delayed until the
4047 next cycle. */
4048
4049 static bool
4050 c6x_registers_update (rtx_insn *insn)
4051 {
4052 enum attr_cross cross;
4053 enum attr_dest_regfile destrf;
4054 int i, nops;
4055 rtx x;
4056
4057 if (!reload_completed || recog_memoized (insn) < 0)
4058 return false;
4059
4060 reg_access_stall = false;
4061 memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4062 sizeof ss.tmp_reg_n_accesses);
4063 memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4064 sizeof ss.tmp_reg_n_xaccesses);
4065
4066 extract_insn (insn);
4067
4068 cross = get_attr_cross (insn);
4069 destrf = get_attr_dest_regfile (insn);
4070
4071 nops = recog_data.n_operands;
4072 x = PATTERN (insn);
4073 if (GET_CODE (x) == COND_EXEC)
4074 {
4075 c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4076 nops -= 2;
4077 }
4078
4079 for (i = 0; i < nops; i++)
4080 {
4081 rtx op = recog_data.operand[i];
4082 if (recog_data.operand_type[i] == OP_OUT)
4083 continue;
4084 if (REG_P (op))
4085 {
4086 bool this_cross = cross;
4087 if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4088 this_cross = false;
4089 if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4090 this_cross = false;
4091 c6x_mark_reg_read (op, this_cross);
4092 }
4093 else if (MEM_P (op))
4094 {
4095 op = XEXP (op, 0);
4096 switch (GET_CODE (op))
4097 {
4098 case POST_INC:
4099 case PRE_INC:
4100 case POST_DEC:
4101 case PRE_DEC:
4102 op = XEXP (op, 0);
4103 /* fall through */
4104 case REG:
4105 c6x_mark_reg_read (op, false);
4106 break;
4107 case POST_MODIFY:
4108 case PRE_MODIFY:
4109 op = XEXP (op, 1);
4110 gcc_assert (GET_CODE (op) == PLUS);
4111 /* fall through */
4112 case PLUS:
4113 c6x_mark_reg_read (XEXP (op, 0), false);
4114 if (REG_P (XEXP (op, 1)))
4115 c6x_mark_reg_read (XEXP (op, 1), false);
4116 break;
4117 case SYMBOL_REF:
4118 case LABEL_REF:
4119 case CONST:
4120 c6x_mark_regno_read (REG_B14, false);
4121 break;
4122 default:
4123 gcc_unreachable ();
4124 }
4125 }
4126 else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4127 gcc_unreachable ();
4128 }
4129 return reg_access_stall;
4130 }
4131
4132 /* Helper function for the TARGET_SCHED_REORDER and
4133 TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe
4134 in the current cycle, move it down in the ready list and return the
4135 number of non-unsafe insns. */
4136
4137 static int
4138 c6x_sched_reorder_1 (rtx_insn **ready, int *pn_ready, int clock_var)
4139 {
4140 int n_ready = *pn_ready;
4141 rtx_insn **e_ready = ready + n_ready;
4142 rtx_insn **insnp;
4143 int first_jump;
4144
4145 /* Keep track of conflicts due to a limit number of register accesses,
4146 and due to stalls incurred by too early accesses of registers using
4147 cross paths. */
4148
4149 for (insnp = ready; insnp < e_ready; insnp++)
4150 {
4151 rtx_insn *insn = *insnp;
4152 int icode = recog_memoized (insn);
4153 bool is_asm = (icode < 0
4154 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4155 || asm_noperands (PATTERN (insn)) >= 0));
4156 bool no_parallel = (is_asm || icode == CODE_FOR_sploop
4157 || (icode >= 0
4158 && get_attr_type (insn) == TYPE_ATOMIC));
4159
4160 /* We delay asm insns until all delay slots are exhausted. We can't
4161 accurately tell how many cycles an asm takes, and the main scheduling
4162 code always assumes at least 1 cycle, which may be wrong. */
4163 if ((no_parallel
4164 && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
4165 || c6x_registers_update (insn)
4166 || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
4167 {
4168 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4169 *ready = insn;
4170 n_ready--;
4171 ready++;
4172 }
4173 else if (shadow_p (insn))
4174 {
4175 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4176 *ready = insn;
4177 }
4178 }
4179
4180 /* Ensure that no other jump is scheduled in jump delay slots, since
4181 it would put the machine into the wrong state. Also, we must
4182 avoid scheduling insns that have a latency longer than the
4183 remaining jump delay slots, as the code at the jump destination
4184 won't be prepared for it.
4185
4186 However, we can relax this condition somewhat. The rest of the
4187 scheduler will automatically avoid scheduling an insn on which
4188 the jump shadow depends so late that its side effect happens
4189 after the jump. This means that if we see an insn with a longer
4190 latency here, it can safely be scheduled if we can ensure that it
4191 has a predicate opposite of the previous jump: the side effect
4192 will happen in what we think of as the same basic block. In
4193 c6x_variable_issue, we will record the necessary predicate in
4194 new_conditions, and after scheduling is finished, we will modify
4195 the insn.
4196
4197 Special care must be taken whenever there is more than one jump
4198 in flight. */
4199
4200 first_jump = first_jump_index (clock_var);
4201 if (first_jump != -1)
4202 {
4203 int first_cycle = get_jump_cycle (first_jump);
4204 rtx first_cond = get_jump_cond (first_jump);
4205 int second_cycle = 0;
4206
4207 if (first_jump > 0)
4208 second_cycle = get_jump_cycle (first_jump - 1);
4209
4210 for (insnp = ready; insnp < e_ready; insnp++)
4211 {
4212 rtx_insn *insn = *insnp;
4213 int icode = recog_memoized (insn);
4214 bool is_asm = (icode < 0
4215 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4216 || asm_noperands (PATTERN (insn)) >= 0));
4217 int this_cycles, rsrv_cycles;
4218 enum attr_type type;
4219
4220 gcc_assert (!is_asm);
4221 if (icode < 0)
4222 continue;
4223 this_cycles = get_attr_cycles (insn);
4224 rsrv_cycles = get_attr_reserve_cycles (insn);
4225 type = get_attr_type (insn);
4226 /* Treat branches specially; there is also a hazard if two jumps
4227 end at the same cycle. */
4228 if (type == TYPE_BRANCH || type == TYPE_CALL)
4229 this_cycles++;
4230 if (clock_var + this_cycles <= first_cycle)
4231 continue;
4232 if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
4233 || clock_var + rsrv_cycles > first_cycle
4234 || !predicate_insn (insn, first_cond, false))
4235 {
4236 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4237 *ready = insn;
4238 n_ready--;
4239 ready++;
4240 }
4241 }
4242 }
4243
4244 return n_ready;
4245 }
4246
4247 /* Implement the TARGET_SCHED_REORDER hook. We save the current clock
4248 for later and clear the register access information for the new
4249 cycle. We also move asm statements out of the way if they would be
4250 scheduled in a delay slot. */
4251
4252 static int
4253 c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4254 int sched_verbose ATTRIBUTE_UNUSED,
4255 rtx_insn **ready ATTRIBUTE_UNUSED,
4256 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4257 {
4258 ss.curr_sched_clock = clock_var;
4259 ss.issued_this_cycle = 0;
4260 memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4261 memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4262
4263 if (ready == NULL)
4264 return 0;
4265
4266 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4267 }
4268
4269 /* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock
4270 cycle for every insn. */
4271
4272 static int
4273 c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4274 int sched_verbose ATTRIBUTE_UNUSED,
4275 rtx_insn **ready ATTRIBUTE_UNUSED,
4276 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4277 {
4278 /* FIXME: the assembler rejects labels inside an execute packet.
4279 This can occur if prologue insns are scheduled in parallel with
4280 others, so we avoid this here. Also make sure that nothing is
4281 scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */
4282 if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4283 || JUMP_P (ss.last_scheduled_insn)
4284 || (recog_memoized (ss.last_scheduled_insn) >= 0
4285 && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4286 {
4287 int n_ready = *pn_ready;
4288 rtx_insn **e_ready = ready + n_ready;
4289 rtx_insn **insnp;
4290
4291 for (insnp = ready; insnp < e_ready; insnp++)
4292 {
4293 rtx_insn *insn = *insnp;
4294 if (!shadow_p (insn))
4295 {
4296 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4297 *ready = insn;
4298 n_ready--;
4299 ready++;
4300 }
4301 }
4302 return n_ready;
4303 }
4304
4305 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4306 }
4307
4308 /* Subroutine of maybe_clobber_cond, called through note_stores. */
4309
4310 static void
4311 clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4312 {
4313 rtx *cond = (rtx *)data1;
4314 if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4315 *cond = NULL_RTX;
4316 }
4317
4318 /* Examine INSN, and if it destroys the conditions have recorded for
4319 any of the jumps in flight, clear that condition so that we don't
4320 predicate any more insns. CLOCK_VAR helps us limit the search to
4321 only those jumps which are still in flight. */
4322
4323 static void
4324 maybe_clobber_cond (rtx insn, int clock_var)
4325 {
4326 int n, idx;
4327 idx = ss.jump_cycle_index;
4328 for (n = 0; n < 12; n++, idx++)
4329 {
4330 rtx cond, link;
4331 int cycle;
4332
4333 if (idx >= 12)
4334 idx -= 12;
4335 cycle = ss.jump_cycles[idx];
4336 if (cycle <= clock_var)
4337 return;
4338
4339 cond = ss.jump_cond[idx];
4340 if (cond == NULL_RTX)
4341 continue;
4342
4343 if (CALL_P (insn))
4344 {
4345 ss.jump_cond[idx] = NULL_RTX;
4346 continue;
4347 }
4348
4349 note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4350 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4351 if (REG_NOTE_KIND (link) == REG_INC)
4352 clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4353 }
4354 }
4355
4356 /* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to
4357 issue INSN. Return the number of insns left on the ready queue
4358 that can be issued this cycle.
4359 We use this hook to record clock cycles and reservations for every insn. */
4360
4361 static int
4362 c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4363 int sched_verbose ATTRIBUTE_UNUSED,
4364 rtx_insn *insn, int can_issue_more ATTRIBUTE_UNUSED)
4365 {
4366 ss.last_scheduled_insn = insn;
4367 if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4368 ss.last_scheduled_iter0 = insn;
4369 if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4370 ss.issued_this_cycle++;
4371 if (insn_info.exists ())
4372 {
4373 state_t st_after = alloca (dfa_state_size);
4374 int curr_clock = ss.curr_sched_clock;
4375 int uid = INSN_UID (insn);
4376 int icode = recog_memoized (insn);
4377 rtx first_cond;
4378 int first, first_cycle;
4379 unsigned int mask;
4380 int i;
4381
4382 insn_set_clock (insn, curr_clock);
4383 INSN_INFO_ENTRY (uid).ebb_start
4384 = curr_clock == 0 && ss.issued_this_cycle == 1;
4385
4386 first = first_jump_index (ss.curr_sched_clock);
4387 if (first == -1)
4388 {
4389 first_cycle = 0;
4390 first_cond = NULL_RTX;
4391 }
4392 else
4393 {
4394 first_cycle = get_jump_cycle (first);
4395 first_cond = get_jump_cond (first);
4396 }
4397 if (icode >= 0
4398 && first_cycle > curr_clock
4399 && first_cond != NULL_RTX
4400 && (curr_clock + get_attr_cycles (insn) > first_cycle
4401 || get_attr_type (insn) == TYPE_BRANCH
4402 || get_attr_type (insn) == TYPE_CALL))
4403 INSN_INFO_ENTRY (uid).new_cond = first_cond;
4404
4405 memcpy (st_after, curr_state, dfa_state_size);
4406 state_transition (st_after, const0_rtx);
4407
4408 mask = 0;
4409 for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4410 if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4411 && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4412 mask |= 1 << i;
4413 INSN_INFO_ENTRY (uid).unit_mask = mask;
4414
4415 maybe_clobber_cond (insn, curr_clock);
4416
4417 if (icode >= 0)
4418 {
4419 int i, cycles;
4420
4421 c6x_registers_update (insn);
4422 memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4423 sizeof ss.reg_n_accesses);
4424 memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4425 sizeof ss.reg_n_xaccesses);
4426
4427 cycles = get_attr_cycles (insn);
4428 if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4429 ss.delays_finished_at = ss.curr_sched_clock + cycles;
4430 if (get_attr_type (insn) == TYPE_BRANCH
4431 || get_attr_type (insn) == TYPE_CALL)
4432 {
4433 rtx opposite = condjump_opposite_condition (insn);
4434 record_jump (ss.curr_sched_clock + cycles, opposite);
4435 }
4436
4437 /* Mark the cycles in which the destination registers are written.
4438 This is used for calculating stalls when using cross units. */
4439 extract_insn (insn);
4440 /* Cross-path stalls don't apply to results of load insns. */
4441 if (get_attr_type (insn) == TYPE_LOAD
4442 || get_attr_type (insn) == TYPE_LOADN
4443 || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4444 cycles--;
4445 for (i = 0; i < recog_data.n_operands; i++)
4446 {
4447 rtx op = recog_data.operand[i];
4448 if (MEM_P (op))
4449 {
4450 rtx addr = XEXP (op, 0);
4451 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4452 c6x_mark_reg_written (XEXP (addr, 0),
4453 insn_uid_get_clock (uid) + 1);
4454 }
4455 if (recog_data.operand_type[i] != OP_IN
4456 && REG_P (op))
4457 {
4458 c6x_mark_reg_written (op,
4459 insn_uid_get_clock (uid) + cycles);
4460 }
4461 }
4462 }
4463 }
4464 return can_issue_more;
4465 }
4466
4467 /* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for
4468 anti- and output dependencies. */
4469
4470 static int
4471 c6x_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4472 unsigned int)
4473 {
4474 enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4475 int dep_insn_code_number, insn_code_number;
4476 int shadow_bonus = 0;
4477 enum reg_note kind;
4478 dep_insn_code_number = recog_memoized (dep_insn);
4479 insn_code_number = recog_memoized (insn);
4480
4481 if (dep_insn_code_number >= 0)
4482 dep_insn_type = get_attr_type (dep_insn);
4483
4484 if (insn_code_number >= 0)
4485 insn_type = get_attr_type (insn);
4486
4487 kind = (reg_note) dep_type;
4488 if (kind == 0)
4489 {
4490 /* If we have a dependency on a load, and it's not for the result of
4491 the load, it must be for an autoincrement. Reduce the cost in that
4492 case. */
4493 if (dep_insn_type == TYPE_LOAD)
4494 {
4495 rtx set = PATTERN (dep_insn);
4496 if (GET_CODE (set) == COND_EXEC)
4497 set = COND_EXEC_CODE (set);
4498 if (GET_CODE (set) == UNSPEC)
4499 cost = 1;
4500 else
4501 {
4502 gcc_assert (GET_CODE (set) == SET);
4503 if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4504 cost = 1;
4505 }
4506 }
4507 }
4508
4509 /* A jump shadow needs to have its latency decreased by one. Conceptually,
4510 it occurs in between two cycles, but we schedule it at the end of the
4511 first cycle. */
4512 if (shadow_type_p (insn_type))
4513 shadow_bonus = 1;
4514
4515 /* Anti and output dependencies usually have zero cost, but we want
4516 to insert a stall after a jump, and after certain floating point
4517 insns that take more than one cycle to read their inputs. In the
4518 future, we should try to find a better algorithm for scheduling
4519 jumps. */
4520 if (kind != 0)
4521 {
4522 /* We can get anti-dependencies against shadow insns. Treat these
4523 like output dependencies, so that the insn is entirely finished
4524 before the branch takes place. */
4525 if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4526 kind = REG_DEP_OUTPUT;
4527 switch (dep_insn_type)
4528 {
4529 case TYPE_CALLP:
4530 return 1;
4531 case TYPE_BRANCH:
4532 case TYPE_CALL:
4533 if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4534 /* This is a real_jump/real_call insn. These don't have
4535 outputs, and ensuring the validity of scheduling things
4536 in the delay slot is the job of
4537 c6x_sched_reorder_1. */
4538 return 0;
4539 /* Unsplit calls can happen - e.g. for divide insns. */
4540 return 6;
4541 case TYPE_LOAD:
4542 case TYPE_LOADN:
4543 case TYPE_INTDP:
4544 if (kind == REG_DEP_OUTPUT)
4545 return 5 - shadow_bonus;
4546 return 0;
4547 case TYPE_MPY4:
4548 case TYPE_FP4:
4549 if (kind == REG_DEP_OUTPUT)
4550 return 4 - shadow_bonus;
4551 return 0;
4552 case TYPE_MPY2:
4553 if (kind == REG_DEP_OUTPUT)
4554 return 2 - shadow_bonus;
4555 return 0;
4556 case TYPE_CMPDP:
4557 if (kind == REG_DEP_OUTPUT)
4558 return 2 - shadow_bonus;
4559 return 2;
4560 case TYPE_ADDDP:
4561 case TYPE_MPYSPDP:
4562 if (kind == REG_DEP_OUTPUT)
4563 return 7 - shadow_bonus;
4564 return 2;
4565 case TYPE_MPYSP2DP:
4566 if (kind == REG_DEP_OUTPUT)
4567 return 5 - shadow_bonus;
4568 return 2;
4569 case TYPE_MPYI:
4570 if (kind == REG_DEP_OUTPUT)
4571 return 9 - shadow_bonus;
4572 return 4;
4573 case TYPE_MPYID:
4574 case TYPE_MPYDP:
4575 if (kind == REG_DEP_OUTPUT)
4576 return 10 - shadow_bonus;
4577 return 4;
4578
4579 default:
4580 if (insn_type == TYPE_SPKERNEL)
4581 return 0;
4582 if (kind == REG_DEP_OUTPUT)
4583 return 1 - shadow_bonus;
4584
4585 return 0;
4586 }
4587 }
4588
4589 return cost - shadow_bonus;
4590 }
4591 \f
4592 /* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4593 are N_FILLED. REAL_FIRST identifies the slot if the insn that appears
4594 first in the original stream. */
4595
4596 static void
4597 gen_one_bundle (rtx_insn **slot, int n_filled, int real_first)
4598 {
4599 rtx seq;
4600 rtx_insn *bundle;
4601 rtx_insn *t;
4602 int i;
4603
4604 seq = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4605 bundle = make_insn_raw (seq);
4606 BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
4607 INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
4608 SET_PREV_INSN (bundle) = SET_PREV_INSN (slot[real_first]);
4609
4610 t = NULL;
4611
4612 for (i = 0; i < n_filled; i++)
4613 {
4614 rtx_insn *insn = slot[i];
4615 remove_insn (insn);
4616 SET_PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4617 if (t != NULL_RTX)
4618 SET_NEXT_INSN (t) = insn;
4619 t = insn;
4620 if (i > 0)
4621 INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
4622 }
4623
4624 SET_NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4625 SET_NEXT_INSN (t) = NEXT_INSN (bundle);
4626 SET_NEXT_INSN (PREV_INSN (bundle)) = bundle;
4627 SET_PREV_INSN (NEXT_INSN (bundle)) = bundle;
4628 }
4629
4630 /* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4631 try to insert labels in the middle. */
4632
4633 static void
4634 c6x_gen_bundles (void)
4635 {
4636 basic_block bb;
4637 rtx_insn *insn, *next, *last_call;
4638
4639 FOR_EACH_BB_FN (bb, cfun)
4640 {
4641 rtx_insn *insn, *next;
4642 /* The machine is eight insns wide. We can have up to six shadow
4643 insns, plus an extra slot for merging the jump shadow. */
4644 rtx_insn *slot[15];
4645 int n_filled = 0;
4646 int first_slot = 0;
4647
4648 for (insn = BB_HEAD (bb);; insn = next)
4649 {
4650 int at_end;
4651 rtx delete_this = NULL_RTX;
4652
4653 if (NONDEBUG_INSN_P (insn))
4654 {
4655 /* Put calls at the start of the sequence. */
4656 if (CALL_P (insn))
4657 {
4658 first_slot++;
4659 if (n_filled)
4660 {
4661 memmove (&slot[1], &slot[0],
4662 n_filled * sizeof (slot[0]));
4663 }
4664 if (!shadow_p (insn))
4665 {
4666 PUT_MODE (insn, TImode);
4667 if (n_filled)
4668 PUT_MODE (slot[1], VOIDmode);
4669 }
4670 n_filled++;
4671 slot[0] = insn;
4672 }
4673 else
4674 {
4675 slot[n_filled++] = insn;
4676 }
4677 }
4678
4679 next = NEXT_INSN (insn);
4680 while (next && insn != BB_END (bb)
4681 && !(NONDEBUG_INSN_P (next)
4682 && GET_CODE (PATTERN (next)) != USE
4683 && GET_CODE (PATTERN (next)) != CLOBBER))
4684 {
4685 insn = next;
4686 next = NEXT_INSN (insn);
4687 }
4688
4689 at_end = insn == BB_END (bb);
4690 if (delete_this == NULL_RTX
4691 && (at_end || (GET_MODE (next) == TImode
4692 && !(shadow_p (next) && CALL_P (next)))))
4693 {
4694 if (n_filled >= 2)
4695 gen_one_bundle (slot, n_filled, first_slot);
4696
4697 n_filled = 0;
4698 first_slot = 0;
4699 }
4700 if (at_end)
4701 break;
4702 }
4703 }
4704 /* Bundling, and emitting nops, can separate
4705 NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls. Fix
4706 that up here. */
4707 last_call = NULL;
4708 for (insn = get_insns (); insn; insn = next)
4709 {
4710 next = NEXT_INSN (insn);
4711 if (CALL_P (insn)
4712 || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
4713 && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
4714 last_call = insn;
4715 if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
4716 continue;
4717 if (NEXT_INSN (last_call) == insn)
4718 continue;
4719 SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
4720 SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
4721 SET_PREV_INSN (insn) = last_call;
4722 SET_NEXT_INSN (insn) = NEXT_INSN (last_call);
4723 SET_PREV_INSN (NEXT_INSN (insn)) = insn;
4724 SET_NEXT_INSN (PREV_INSN (insn)) = insn;
4725 last_call = insn;
4726 }
4727 }
4728
4729 /* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */
4730
4731 static rtx_insn *
4732 emit_nop_after (int cycles, rtx_insn *after)
4733 {
4734 rtx_insn *insn;
4735
4736 /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4737 operation. We don't need the extra NOP since in this case, the hardware
4738 will automatically insert the required stall. */
4739 if (cycles == 10)
4740 cycles--;
4741
4742 gcc_assert (cycles < 10);
4743
4744 insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4745 PUT_MODE (insn, TImode);
4746
4747 return insn;
4748 }
4749
4750 /* Determine whether INSN is a call that needs to have a return label
4751 placed. */
4752
4753 static bool
4754 returning_call_p (rtx_insn *insn)
4755 {
4756 if (CALL_P (insn))
4757 return (!SIBLING_CALL_P (insn)
4758 && get_attr_type (insn) != TYPE_CALLP
4759 && get_attr_type (insn) != TYPE_SHADOW);
4760 if (recog_memoized (insn) < 0)
4761 return false;
4762 if (get_attr_type (insn) == TYPE_CALL)
4763 return true;
4764 return false;
4765 }
4766
4767 /* Determine whether INSN's pattern can be converted to use callp. */
4768 static bool
4769 can_use_callp (rtx_insn *insn)
4770 {
4771 int icode = recog_memoized (insn);
4772 if (!TARGET_INSNS_64PLUS
4773 || icode < 0
4774 || GET_CODE (PATTERN (insn)) == COND_EXEC)
4775 return false;
4776
4777 return ((icode == CODE_FOR_real_call
4778 || icode == CODE_FOR_call_internal
4779 || icode == CODE_FOR_call_value_internal)
4780 && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4781 }
4782
4783 /* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */
4784 static void
4785 convert_to_callp (rtx_insn *insn)
4786 {
4787 rtx lab;
4788 extract_insn (insn);
4789 if (GET_CODE (PATTERN (insn)) == SET)
4790 {
4791 rtx dest = recog_data.operand[0];
4792 lab = recog_data.operand[1];
4793 PATTERN (insn) = gen_callp_value (dest, lab);
4794 INSN_CODE (insn) = CODE_FOR_callp_value;
4795 }
4796 else
4797 {
4798 lab = recog_data.operand[0];
4799 PATTERN (insn) = gen_callp (lab);
4800 INSN_CODE (insn) = CODE_FOR_callp;
4801 }
4802 }
4803
4804 /* Scan forwards from INSN until we find the next insn that has mode TImode
4805 (indicating it starts a new cycle), and occurs in cycle CLOCK.
4806 Return it if we find such an insn, NULL_RTX otherwise. */
4807 static rtx_insn *
4808 find_next_cycle_insn (rtx_insn *insn, int clock)
4809 {
4810 rtx_insn *t = insn;
4811 if (GET_MODE (t) == TImode)
4812 t = next_real_insn (t);
4813 while (t && GET_MODE (t) != TImode)
4814 t = next_real_insn (t);
4815
4816 if (t && insn_get_clock (t) == clock)
4817 return t;
4818 return NULL;
4819 }
4820
4821 /* If COND_INSN has a COND_EXEC condition, wrap the same condition
4822 around PAT. Return PAT either unchanged or modified in this
4823 way. */
4824 static rtx
4825 duplicate_cond (rtx pat, rtx cond_insn)
4826 {
4827 rtx cond_pat = PATTERN (cond_insn);
4828 if (GET_CODE (cond_pat) == COND_EXEC)
4829 pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4830 pat);
4831 return pat;
4832 }
4833
4834 /* Walk forward from INSN to find the last insn that issues in the same clock
4835 cycle. */
4836 static rtx_insn *
4837 find_last_same_clock (rtx_insn *insn)
4838 {
4839 rtx_insn *retval = insn;
4840 rtx_insn *t = next_real_insn (insn);
4841
4842 while (t && GET_MODE (t) != TImode)
4843 {
4844 if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4845 retval = t;
4846 t = next_real_insn (t);
4847 }
4848 return retval;
4849 }
4850
4851 /* For every call insn in the function, emit code to load the return
4852 address. For each call we create a return label and store it in
4853 CALL_LABELS. If are not scheduling, we emit the labels here,
4854 otherwise the caller will do it later.
4855 This function is called after final insn scheduling, but before creating
4856 the SEQUENCEs that represent execute packets. */
4857
4858 static void
4859 reorg_split_calls (rtx_insn **call_labels)
4860 {
4861 unsigned int reservation_mask = 0;
4862 rtx_insn *insn = get_insns ();
4863 gcc_assert (NOTE_P (insn));
4864 insn = next_real_insn (insn);
4865 while (insn)
4866 {
4867 int uid;
4868 rtx_insn *next = next_real_insn (insn);
4869
4870 if (DEBUG_INSN_P (insn))
4871 goto done;
4872
4873 if (GET_MODE (insn) == TImode)
4874 reservation_mask = 0;
4875 uid = INSN_UID (insn);
4876 if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4877 reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4878
4879 if (returning_call_p (insn))
4880 {
4881 rtx_code_label *label = gen_label_rtx ();
4882 rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4883 rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4884
4885 LABEL_NUSES (label) = 2;
4886 if (!c6x_flag_schedule_insns2)
4887 {
4888 if (can_use_callp (insn))
4889 convert_to_callp (insn);
4890 else
4891 {
4892 rtx t;
4893 rtx_insn *slot[4];
4894 emit_label_after (label, insn);
4895
4896 /* Bundle the call and its delay slots into a single
4897 SEQUENCE. While these do not issue in parallel
4898 we need to group them into a single EH region. */
4899 slot[0] = insn;
4900 PUT_MODE (insn, TImode);
4901 if (TARGET_INSNS_64)
4902 {
4903 t = gen_addkpc (reg, labelref, GEN_INT (4));
4904 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4905 insn);
4906 PUT_MODE (slot[1], TImode);
4907 gen_one_bundle (slot, 2, 0);
4908 }
4909 else
4910 {
4911 slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4912 insn);
4913 PUT_MODE (slot[3], TImode);
4914 t = gen_movsi_lo_sum (reg, reg, labelref);
4915 slot[2] = emit_insn_after (duplicate_cond (t, insn),
4916 insn);
4917 PUT_MODE (slot[2], TImode);
4918 t = gen_movsi_high (reg, labelref);
4919 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4920 insn);
4921 PUT_MODE (slot[1], TImode);
4922 gen_one_bundle (slot, 4, 0);
4923 }
4924 }
4925 }
4926 else
4927 {
4928 /* If we scheduled, we reserved the .S2 unit for one or two
4929 cycles after the call. Emit the insns in these slots,
4930 unless it's possible to create a CALLP insn.
4931 Note that this works because the dependencies ensure that
4932 no insn setting/using B3 is scheduled in the delay slots of
4933 a call. */
4934 int this_clock = insn_get_clock (insn);
4935 rtx_insn *after1;
4936
4937 call_labels[INSN_UID (insn)] = label;
4938
4939 rtx_insn *last_same_clock = find_last_same_clock (insn);
4940
4941 if (can_use_callp (insn))
4942 {
4943 /* Find the first insn of the next execute packet. If it
4944 is the shadow insn corresponding to this call, we may
4945 use a CALLP insn. */
4946 rtx_insn *shadow =
4947 next_nonnote_nondebug_insn (last_same_clock);
4948
4949 if (CALL_P (shadow)
4950 && insn_get_clock (shadow) == this_clock + 5)
4951 {
4952 convert_to_callp (shadow);
4953 insn_set_clock (shadow, this_clock);
4954 INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4955 = RESERVATION_S2;
4956 INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4957 = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
4958 if (GET_MODE (insn) == TImode)
4959 {
4960 rtx_insn *new_cycle_first = NEXT_INSN (insn);
4961 while (!NONDEBUG_INSN_P (new_cycle_first)
4962 || GET_CODE (PATTERN (new_cycle_first)) == USE
4963 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4964 new_cycle_first = NEXT_INSN (new_cycle_first);
4965 PUT_MODE (new_cycle_first, TImode);
4966 if (new_cycle_first != shadow)
4967 PUT_MODE (shadow, VOIDmode);
4968 INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4969 = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4970 }
4971 else
4972 PUT_MODE (shadow, VOIDmode);
4973 delete_insn (insn);
4974 goto done;
4975 }
4976 }
4977 after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4978 if (after1 == NULL_RTX)
4979 after1 = last_same_clock;
4980 else
4981 after1 = find_last_same_clock (after1);
4982 if (TARGET_INSNS_64)
4983 {
4984 rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4985 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4986 insn_set_clock (x1, this_clock + 1);
4987 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4988 if (after1 == last_same_clock)
4989 PUT_MODE (x1, TImode);
4990 else
4991 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4992 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4993 }
4994 else
4995 {
4996 rtx x1, x2;
4997 rtx_insn *after2 = find_next_cycle_insn (after1,
4998 this_clock + 2);
4999 if (after2 == NULL_RTX)
5000 after2 = after1;
5001 x2 = gen_movsi_lo_sum (reg, reg, labelref);
5002 x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
5003 x1 = gen_movsi_high (reg, labelref);
5004 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
5005 insn_set_clock (x1, this_clock + 1);
5006 insn_set_clock (x2, this_clock + 2);
5007 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
5008 INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
5009 if (after1 == last_same_clock)
5010 PUT_MODE (x1, TImode);
5011 else
5012 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
5013 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
5014 if (after1 == after2)
5015 PUT_MODE (x2, TImode);
5016 else
5017 INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
5018 = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
5019 }
5020 }
5021 }
5022 done:
5023 insn = next;
5024 }
5025 }
5026
5027 /* Called as part of c6x_reorg. This function emits multi-cycle NOP
5028 insns as required for correctness. CALL_LABELS is the array that
5029 holds the return labels for call insns; we emit these here if
5030 scheduling was run earlier. */
5031
5032 static void
5033 reorg_emit_nops (rtx_insn **call_labels)
5034 {
5035 bool first;
5036 rtx last_call;
5037 rtx_insn *prev;
5038 int prev_clock, earliest_bb_end;
5039 int prev_implicit_nops;
5040 rtx_insn *insn = get_insns ();
5041
5042 /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5043 its issue time in PREV_CLOCK for the next iteration. If there is a gap in
5044 clocks, we must insert a NOP.
5045 EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5046 current basic block will finish. We must not allow the next basic block to
5047 begin before this cycle.
5048 PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5049 a multi-cycle nop. The code is scheduled such that subsequent insns will
5050 show the cycle gap, but we needn't insert a real NOP instruction. */
5051 insn = next_real_insn (insn);
5052 last_call = prev = NULL;
5053 prev_clock = -1;
5054 earliest_bb_end = 0;
5055 prev_implicit_nops = 0;
5056 first = true;
5057 while (insn)
5058 {
5059 int this_clock = -1;
5060 rtx_insn *next;
5061 int max_cycles = 0;
5062
5063 next = next_real_insn (insn);
5064
5065 if (DEBUG_INSN_P (insn)
5066 || GET_CODE (PATTERN (insn)) == USE
5067 || GET_CODE (PATTERN (insn)) == CLOBBER
5068 || shadow_or_blockage_p (insn)
5069 || JUMP_TABLE_DATA_P (insn))
5070 goto next_insn;
5071
5072 if (!c6x_flag_schedule_insns2)
5073 /* No scheduling; ensure that no parallel issue happens. */
5074 PUT_MODE (insn, TImode);
5075 else
5076 {
5077 int cycles;
5078
5079 this_clock = insn_get_clock (insn);
5080 if (this_clock != prev_clock)
5081 {
5082 PUT_MODE (insn, TImode);
5083
5084 if (!first)
5085 {
5086 cycles = this_clock - prev_clock;
5087
5088 cycles -= prev_implicit_nops;
5089 if (cycles > 1)
5090 {
5091 rtx nop = emit_nop_after (cycles - 1, prev);
5092 insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5093 }
5094 }
5095 prev_clock = this_clock;
5096
5097 if (last_call
5098 && insn_get_clock (last_call) + 6 <= this_clock)
5099 {
5100 emit_label_before (call_labels[INSN_UID (last_call)], insn);
5101 last_call = NULL_RTX;
5102 }
5103 prev_implicit_nops = 0;
5104 }
5105 }
5106
5107 /* Examine how many cycles the current insn takes, and adjust
5108 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */
5109 if (recog_memoized (insn) >= 0
5110 /* If not scheduling, we've emitted NOPs after calls already. */
5111 && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5112 {
5113 max_cycles = get_attr_cycles (insn);
5114 if (get_attr_type (insn) == TYPE_CALLP)
5115 prev_implicit_nops = 5;
5116 }
5117 else
5118 max_cycles = 1;
5119 if (returning_call_p (insn))
5120 last_call = insn;
5121
5122 if (c6x_flag_schedule_insns2)
5123 {
5124 gcc_assert (this_clock >= 0);
5125 if (earliest_bb_end < this_clock + max_cycles)
5126 earliest_bb_end = this_clock + max_cycles;
5127 }
5128 else if (max_cycles > 1)
5129 emit_nop_after (max_cycles - 1, insn);
5130
5131 prev = insn;
5132 first = false;
5133
5134 next_insn:
5135 if (c6x_flag_schedule_insns2
5136 && (next == NULL_RTX
5137 || (GET_MODE (next) == TImode
5138 && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5139 && earliest_bb_end > 0)
5140 {
5141 int cycles = earliest_bb_end - prev_clock;
5142 if (cycles > 1)
5143 {
5144 prev = emit_nop_after (cycles - 1, prev);
5145 insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5146 }
5147 earliest_bb_end = 0;
5148 prev_clock = -1;
5149 first = true;
5150
5151 if (last_call)
5152 emit_label_after (call_labels[INSN_UID (last_call)], prev);
5153 last_call = NULL_RTX;
5154 }
5155 insn = next;
5156 }
5157 }
5158
5159 /* If possible, split INSN, which we know is either a jump or a call, into a real
5160 insn and its shadow. */
5161 static void
5162 split_delayed_branch (rtx_insn *insn)
5163 {
5164 int code = recog_memoized (insn);
5165 rtx_insn *i1;
5166 rtx newpat;
5167 rtx pat = PATTERN (insn);
5168
5169 if (GET_CODE (pat) == COND_EXEC)
5170 pat = COND_EXEC_CODE (pat);
5171
5172 if (CALL_P (insn))
5173 {
5174 rtx src = pat, dest = NULL_RTX;
5175 rtx callee;
5176 if (GET_CODE (pat) == SET)
5177 {
5178 dest = SET_DEST (pat);
5179 src = SET_SRC (pat);
5180 }
5181 callee = XEXP (XEXP (src, 0), 0);
5182 if (SIBLING_CALL_P (insn))
5183 {
5184 if (REG_P (callee))
5185 newpat = gen_indirect_sibcall_shadow ();
5186 else
5187 newpat = gen_sibcall_shadow (callee);
5188 pat = gen_real_jump (callee);
5189 }
5190 else if (dest != NULL_RTX)
5191 {
5192 if (REG_P (callee))
5193 newpat = gen_indirect_call_value_shadow (dest);
5194 else
5195 newpat = gen_call_value_shadow (dest, callee);
5196 pat = gen_real_call (callee);
5197 }
5198 else
5199 {
5200 if (REG_P (callee))
5201 newpat = gen_indirect_call_shadow ();
5202 else
5203 newpat = gen_call_shadow (callee);
5204 pat = gen_real_call (callee);
5205 }
5206 pat = duplicate_cond (pat, insn);
5207 newpat = duplicate_cond (newpat, insn);
5208 }
5209 else
5210 {
5211 rtx src, op;
5212 if (GET_CODE (pat) == PARALLEL
5213 && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5214 {
5215 newpat = gen_return_shadow ();
5216 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5217 newpat = duplicate_cond (newpat, insn);
5218 }
5219 else
5220 switch (code)
5221 {
5222 case CODE_FOR_br_true:
5223 case CODE_FOR_br_false:
5224 src = SET_SRC (pat);
5225 op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5226 newpat = gen_condjump_shadow (op);
5227 pat = gen_real_jump (op);
5228 if (code == CODE_FOR_br_true)
5229 pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5230 else
5231 pat = gen_rtx_COND_EXEC (VOIDmode,
5232 reversed_comparison (XEXP (src, 0),
5233 VOIDmode),
5234 pat);
5235 break;
5236
5237 case CODE_FOR_jump:
5238 op = SET_SRC (pat);
5239 newpat = gen_jump_shadow (op);
5240 break;
5241
5242 case CODE_FOR_indirect_jump:
5243 newpat = gen_indirect_jump_shadow ();
5244 break;
5245
5246 case CODE_FOR_return_internal:
5247 newpat = gen_return_shadow ();
5248 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5249 break;
5250
5251 default:
5252 return;
5253 }
5254 }
5255 i1 = emit_insn_before (pat, insn);
5256 PATTERN (insn) = newpat;
5257 INSN_CODE (insn) = -1;
5258 record_delay_slot_pair (i1, insn, 5, 0);
5259 }
5260
5261 /* If INSN is a multi-cycle insn that should be handled properly in
5262 modulo-scheduling, split it into a real insn and a shadow.
5263 Return true if we made a change.
5264
5265 It is valid for us to fail to split an insn; the caller has to deal
5266 with the possibility. Currently we handle loads and most mpy2 and
5267 mpy4 insns. */
5268 static bool
5269 split_delayed_nonbranch (rtx_insn *insn)
5270 {
5271 int code = recog_memoized (insn);
5272 enum attr_type type;
5273 rtx_insn *i1;
5274 rtx newpat, src, dest;
5275 rtx pat = PATTERN (insn);
5276 rtvec rtv;
5277 int delay;
5278
5279 if (GET_CODE (pat) == COND_EXEC)
5280 pat = COND_EXEC_CODE (pat);
5281
5282 if (code < 0 || GET_CODE (pat) != SET)
5283 return false;
5284 src = SET_SRC (pat);
5285 dest = SET_DEST (pat);
5286 if (!REG_P (dest))
5287 return false;
5288
5289 type = get_attr_type (insn);
5290 if (code >= 0
5291 && (type == TYPE_LOAD
5292 || type == TYPE_LOADN))
5293 {
5294 if (!MEM_P (src)
5295 && (GET_CODE (src) != ZERO_EXTEND
5296 || !MEM_P (XEXP (src, 0))))
5297 return false;
5298
5299 if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5300 && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5301 return false;
5302
5303 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5304 SET_SRC (pat));
5305 newpat = gen_load_shadow (SET_DEST (pat));
5306 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5307 delay = 4;
5308 }
5309 else if (code >= 0
5310 && (type == TYPE_MPY2
5311 || type == TYPE_MPY4))
5312 {
5313 /* We don't handle floating point multiplies yet. */
5314 if (GET_MODE (dest) == SFmode)
5315 return false;
5316
5317 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5318 SET_SRC (pat));
5319 newpat = gen_mult_shadow (SET_DEST (pat));
5320 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5321 delay = type == TYPE_MPY2 ? 1 : 3;
5322 }
5323 else
5324 return false;
5325
5326 pat = duplicate_cond (pat, insn);
5327 newpat = duplicate_cond (newpat, insn);
5328 i1 = emit_insn_before (pat, insn);
5329 PATTERN (insn) = newpat;
5330 INSN_CODE (insn) = -1;
5331 recog_memoized (insn);
5332 recog_memoized (i1);
5333 record_delay_slot_pair (i1, insn, delay, 0);
5334 return true;
5335 }
5336
5337 /* Examine if INSN is the result of splitting a load into a real load and a
5338 shadow, and if so, undo the transformation. */
5339 static void
5340 undo_split_delayed_nonbranch (rtx_insn *insn)
5341 {
5342 int icode = recog_memoized (insn);
5343 enum attr_type type;
5344 rtx prev_pat, insn_pat;
5345 rtx_insn *prev;
5346
5347 if (icode < 0)
5348 return;
5349 type = get_attr_type (insn);
5350 if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5351 return;
5352 prev = PREV_INSN (insn);
5353 prev_pat = PATTERN (prev);
5354 insn_pat = PATTERN (insn);
5355 if (GET_CODE (prev_pat) == COND_EXEC)
5356 {
5357 prev_pat = COND_EXEC_CODE (prev_pat);
5358 insn_pat = COND_EXEC_CODE (insn_pat);
5359 }
5360
5361 gcc_assert (GET_CODE (prev_pat) == UNSPEC
5362 && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5363 && type == TYPE_LOAD_SHADOW)
5364 || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5365 && type == TYPE_MULT_SHADOW)));
5366 insn_pat = gen_rtx_SET (SET_DEST (insn_pat),
5367 XVECEXP (prev_pat, 0, 1));
5368 insn_pat = duplicate_cond (insn_pat, prev);
5369 PATTERN (insn) = insn_pat;
5370 INSN_CODE (insn) = -1;
5371 delete_insn (prev);
5372 }
5373
5374 /* Split every insn (i.e. jumps and calls) which can have delay slots into
5375 two parts: the first one is scheduled normally and emits the instruction,
5376 while the second one is a shadow insn which shows the side effect taking
5377 place. The second one is placed in the right cycle by the scheduler, but
5378 not emitted as an assembly instruction. */
5379
5380 static void
5381 split_delayed_insns (void)
5382 {
5383 rtx_insn *insn;
5384 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5385 {
5386 if (JUMP_P (insn) || CALL_P (insn))
5387 split_delayed_branch (insn);
5388 }
5389 }
5390
5391 /* For every insn that has an entry in the new_conditions vector, give it
5392 the appropriate predicate. */
5393 static void
5394 conditionalize_after_sched (void)
5395 {
5396 basic_block bb;
5397 rtx_insn *insn;
5398 FOR_EACH_BB_FN (bb, cfun)
5399 FOR_BB_INSNS (bb, insn)
5400 {
5401 unsigned uid = INSN_UID (insn);
5402 rtx cond;
5403 if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5404 continue;
5405 cond = INSN_INFO_ENTRY (uid).new_cond;
5406 if (cond == NULL_RTX)
5407 continue;
5408 if (dump_file)
5409 fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5410 predicate_insn (insn, cond, true);
5411 }
5412 }
5413
5414 /* A callback for the hw-doloop pass. This function examines INSN; if
5415 it is a loop_end pattern we recognize, return the reg rtx for the
5416 loop counter. Otherwise, return NULL_RTX. */
5417
5418 static rtx
5419 hwloop_pattern_reg (rtx_insn *insn)
5420 {
5421 rtx pat, reg;
5422
5423 if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5424 return NULL_RTX;
5425
5426 pat = PATTERN (insn);
5427 reg = SET_DEST (XVECEXP (pat, 0, 1));
5428 if (!REG_P (reg))
5429 return NULL_RTX;
5430 return reg;
5431 }
5432
5433 /* Return the number of cycles taken by BB, as computed by scheduling,
5434 including the latencies of all insns with delay slots. IGNORE is
5435 an insn we should ignore in the calculation, usually the final
5436 branch. */
5437 static int
5438 bb_earliest_end_cycle (basic_block bb, rtx ignore)
5439 {
5440 int earliest = 0;
5441 rtx_insn *insn;
5442
5443 FOR_BB_INSNS (bb, insn)
5444 {
5445 int cycles, this_clock;
5446
5447 if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5448 || GET_CODE (PATTERN (insn)) == USE
5449 || GET_CODE (PATTERN (insn)) == CLOBBER
5450 || insn == ignore)
5451 continue;
5452
5453 this_clock = insn_get_clock (insn);
5454 cycles = get_attr_cycles (insn);
5455
5456 if (earliest < this_clock + cycles)
5457 earliest = this_clock + cycles;
5458 }
5459 return earliest;
5460 }
5461
5462 /* Examine the insns in BB and remove all which have a uid greater or
5463 equal to MAX_UID. */
5464 static void
5465 filter_insns_above (basic_block bb, int max_uid)
5466 {
5467 rtx_insn *insn, *next;
5468 bool prev_ti = false;
5469 int prev_cycle = -1;
5470
5471 FOR_BB_INSNS_SAFE (bb, insn, next)
5472 {
5473 int this_cycle;
5474 if (!NONDEBUG_INSN_P (insn))
5475 continue;
5476 if (insn == BB_END (bb))
5477 return;
5478 this_cycle = insn_get_clock (insn);
5479 if (prev_ti && this_cycle == prev_cycle)
5480 {
5481 gcc_assert (GET_MODE (insn) != TImode);
5482 PUT_MODE (insn, TImode);
5483 }
5484 prev_ti = false;
5485 if (INSN_UID (insn) >= max_uid)
5486 {
5487 if (GET_MODE (insn) == TImode)
5488 {
5489 prev_ti = true;
5490 prev_cycle = this_cycle;
5491 }
5492 delete_insn (insn);
5493 }
5494 }
5495 }
5496
5497 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
5498
5499 static void
5500 c6x_asm_emit_except_personality (rtx personality)
5501 {
5502 fputs ("\t.personality\t", asm_out_file);
5503 output_addr_const (asm_out_file, personality);
5504 fputc ('\n', asm_out_file);
5505 }
5506
5507 /* Use a special assembly directive rather than a regular setion for
5508 unwind table data. */
5509
5510 static void
5511 c6x_asm_init_sections (void)
5512 {
5513 exception_section = get_unnamed_section (0, output_section_asm_op,
5514 "\t.handlerdata");
5515 }
5516
5517 /* A callback for the hw-doloop pass. Called to optimize LOOP in a
5518 machine-specific fashion; returns true if successful and false if
5519 the hwloop_fail function should be called. */
5520
5521 static bool
5522 hwloop_optimize (hwloop_info loop)
5523 {
5524 basic_block entry_bb, bb;
5525 rtx_insn *seq, *insn, *prev, *entry_after, *end_packet;
5526 rtx_insn *head_insn, *tail_insn, *new_insns, *last_insn;
5527 int loop_earliest;
5528 int n_execute_packets;
5529 edge entry_edge;
5530 unsigned ix;
5531 int max_uid_before, delayed_splits;
5532 int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5533 rtx_insn **orig_vec;
5534 rtx_insn **copies;
5535 rtx_insn ***insn_copies;
5536
5537 if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5538 || !TARGET_INSNS_64PLUS)
5539 return false;
5540
5541 if (loop->iter_reg_used || loop->depth > 1)
5542 return false;
5543 if (loop->has_call || loop->has_asm)
5544 return false;
5545
5546 if (loop->head != loop->tail)
5547 return false;
5548
5549 gcc_assert (loop->incoming_dest == loop->head);
5550
5551 entry_edge = NULL;
5552 FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
5553 if (entry_edge->flags & EDGE_FALLTHRU)
5554 break;
5555 if (entry_edge == NULL)
5556 return false;
5557
5558 reshuffle_units (loop->head);
5559
5560 in_hwloop = true;
5561 schedule_ebbs_init ();
5562 schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5563 schedule_ebbs_finish ();
5564 in_hwloop = false;
5565
5566 bb = loop->head;
5567 loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5568
5569 max_uid_before = get_max_uid ();
5570
5571 /* Split all multi-cycle operations, such as loads. For normal
5572 scheduling, we only do this for branches, as the generated code
5573 would otherwise not be interrupt-safe. When using sploop, it is
5574 safe and beneficial to split them. If any multi-cycle operations
5575 remain after splitting (because we don't handle them yet), we
5576 cannot pipeline the loop. */
5577 delayed_splits = 0;
5578 FOR_BB_INSNS (bb, insn)
5579 {
5580 if (NONDEBUG_INSN_P (insn))
5581 {
5582 recog_memoized (insn);
5583 if (split_delayed_nonbranch (insn))
5584 delayed_splits++;
5585 else if (INSN_CODE (insn) >= 0
5586 && get_attr_cycles (insn) > 1)
5587 goto undo_splits;
5588 }
5589 }
5590
5591 /* Count the number of insns as well as the number real insns, and save
5592 the original sequence of insns in case we must restore it later. */
5593 n_insns = n_real_insns = 0;
5594 FOR_BB_INSNS (bb, insn)
5595 {
5596 n_insns++;
5597 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5598 n_real_insns++;
5599 }
5600 orig_vec = XNEWVEC (rtx_insn *, n_insns);
5601 n_insns = 0;
5602 FOR_BB_INSNS (bb, insn)
5603 orig_vec[n_insns++] = insn;
5604
5605 /* Count the unit reservations, and compute a minimum II from that
5606 table. */
5607 count_unit_reqs (unit_reqs, loop->start_label,
5608 PREV_INSN (loop->loop_end));
5609 merge_unit_reqs (unit_reqs);
5610
5611 min_ii = res_mii (unit_reqs);
5612 max_ii = loop_earliest < 15 ? loop_earliest : 14;
5613
5614 /* Make copies of the loop body, up to a maximum number of stages we want
5615 to handle. */
5616 max_parallel = loop_earliest / min_ii + 1;
5617
5618 copies = XCNEWVEC (rtx_insn *, (max_parallel + 1) * n_real_insns);
5619 insn_copies = XNEWVEC (rtx_insn **, max_parallel + 1);
5620 for (i = 0; i < max_parallel + 1; i++)
5621 insn_copies[i] = copies + i * n_real_insns;
5622
5623 head_insn = next_nonnote_nondebug_insn (loop->start_label);
5624 tail_insn = prev_real_insn (BB_END (bb));
5625
5626 i = 0;
5627 FOR_BB_INSNS (bb, insn)
5628 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5629 insn_copies[0][i++] = insn;
5630
5631 sploop_max_uid_iter0 = get_max_uid ();
5632
5633 /* Generate the copies of the loop body, and save them in the
5634 INSN_COPIES array. */
5635 start_sequence ();
5636 for (i = 0; i < max_parallel; i++)
5637 {
5638 int j;
5639 rtx_insn *this_iter;
5640
5641 this_iter = duplicate_insn_chain (head_insn, tail_insn);
5642 j = 0;
5643 while (this_iter)
5644 {
5645 rtx_insn *prev_stage_insn = insn_copies[i][j];
5646 gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5647
5648 if (INSN_CODE (this_iter) >= 0
5649 && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5650 || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5651 {
5652 rtx_insn *prev = PREV_INSN (this_iter);
5653 record_delay_slot_pair (prev, this_iter,
5654 get_attr_cycles (prev) - 1, 0);
5655 }
5656 else
5657 record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5658
5659 insn_copies[i + 1][j] = this_iter;
5660 j++;
5661 this_iter = next_nonnote_nondebug_insn (this_iter);
5662 }
5663 }
5664 new_insns = get_insns ();
5665 last_insn = insn_copies[max_parallel][n_real_insns - 1];
5666 end_sequence ();
5667 emit_insn_before (new_insns, BB_END (bb));
5668
5669 /* Try to schedule the loop using varying initiation intervals,
5670 starting with the smallest possible and incrementing it
5671 on failure. */
5672 for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5673 {
5674 basic_block tmp_bb;
5675 if (dump_file)
5676 fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5677
5678 df_clear_flags (DF_LR_RUN_DCE);
5679
5680 schedule_ebbs_init ();
5681 set_modulo_params (sp_ii, max_parallel, n_real_insns,
5682 sploop_max_uid_iter0);
5683 tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5684 schedule_ebbs_finish ();
5685
5686 if (tmp_bb)
5687 {
5688 if (dump_file)
5689 fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5690 break;
5691 }
5692 }
5693
5694 discard_delay_pairs_above (max_uid_before);
5695
5696 if (sp_ii > max_ii)
5697 goto restore_loop;
5698
5699 stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5700
5701 if (stages == 1 && sp_ii > 5)
5702 goto restore_loop;
5703
5704 /* At this point, we know we've been successful, unless we find later that
5705 there are too many execute packets for the loop buffer to hold. */
5706
5707 /* Assign reservations to the instructions in the loop. We must find
5708 the stage that contains the full loop kernel, and transfer the
5709 reservations of the instructions contained in it to the corresponding
5710 instructions from iteration 0, which are the only ones we'll keep. */
5711 assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5712 SET_PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5713 SET_NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5714 filter_insns_above (bb, sploop_max_uid_iter0);
5715
5716 for (i = 0; i < n_real_insns; i++)
5717 {
5718 rtx insn = insn_copies[0][i];
5719 int uid = INSN_UID (insn);
5720 int stage = insn_uid_get_clock (uid) / sp_ii;
5721
5722 if (stage + 1 < stages)
5723 {
5724 int copy_uid;
5725 stage = stages - stage - 1;
5726 copy_uid = INSN_UID (insn_copies[stage][i]);
5727 INSN_INFO_ENTRY (uid).reservation
5728 = INSN_INFO_ENTRY (copy_uid).reservation;
5729 }
5730 }
5731 if (stages == 1)
5732 stages++;
5733
5734 /* Compute the number of execute packets the pipelined form of the loop will
5735 require. */
5736 prev = NULL;
5737 n_execute_packets = 0;
5738 for (insn = loop->start_label;
5739 insn != loop->loop_end;
5740 insn = NEXT_INSN (insn))
5741 {
5742 if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5743 && !shadow_p (insn))
5744 {
5745 n_execute_packets++;
5746 if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5747 /* We need an extra NOP instruction. */
5748 n_execute_packets++;
5749
5750 prev = insn;
5751 }
5752 }
5753
5754 end_packet = ss.last_scheduled_iter0;
5755 while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5756 end_packet = PREV_INSN (end_packet);
5757
5758 /* The earliest cycle in which we can emit the SPKERNEL instruction. */
5759 loop_earliest = (stages - 1) * sp_ii;
5760 if (loop_earliest > insn_get_clock (end_packet))
5761 {
5762 n_execute_packets++;
5763 end_packet = loop->loop_end;
5764 }
5765 else
5766 loop_earliest = insn_get_clock (end_packet);
5767
5768 if (n_execute_packets > 14)
5769 goto restore_loop;
5770
5771 /* Generate the spkernel instruction, and place it at the appropriate
5772 spot. */
5773 PUT_MODE (end_packet, VOIDmode);
5774
5775 insn = emit_jump_insn_before (
5776 gen_spkernel (GEN_INT (stages - 1),
5777 const0_rtx, JUMP_LABEL (loop->loop_end)),
5778 end_packet);
5779 JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5780 insn_set_clock (insn, loop_earliest);
5781 PUT_MODE (insn, TImode);
5782 INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5783 delete_insn (loop->loop_end);
5784
5785 /* Place the mvc and sploop instructions before the loop. */
5786 entry_bb = entry_edge->src;
5787
5788 start_sequence ();
5789
5790 insn = emit_insn (gen_mvilc (loop->iter_reg));
5791 insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5792
5793 seq = get_insns ();
5794
5795 if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
5796 {
5797 basic_block new_bb;
5798 edge e;
5799 edge_iterator ei;
5800
5801 emit_insn_before (seq, BB_HEAD (loop->head));
5802 seq = emit_label_before (gen_label_rtx (), seq);
5803
5804 new_bb = create_basic_block (seq, insn, entry_bb);
5805 FOR_EACH_EDGE (e, ei, loop->incoming)
5806 {
5807 if (!(e->flags & EDGE_FALLTHRU))
5808 redirect_edge_and_branch_force (e, new_bb);
5809 else
5810 redirect_edge_succ (e, new_bb);
5811 }
5812 make_edge (new_bb, loop->head, 0);
5813 }
5814 else
5815 {
5816 entry_after = BB_END (entry_bb);
5817 while (DEBUG_INSN_P (entry_after)
5818 || (NOTE_P (entry_after)
5819 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5820 entry_after = PREV_INSN (entry_after);
5821 emit_insn_after (seq, entry_after);
5822 }
5823
5824 end_sequence ();
5825
5826 /* Make sure we don't try to schedule this loop again. */
5827 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
5828 bb->flags |= BB_DISABLE_SCHEDULE;
5829
5830 return true;
5831
5832 restore_loop:
5833 if (dump_file)
5834 fprintf (dump_file, "Unable to pipeline loop.\n");
5835
5836 for (i = 1; i < n_insns; i++)
5837 {
5838 SET_NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5839 SET_PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5840 }
5841 SET_PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5842 SET_NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5843 SET_NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5844 SET_PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5845 BB_HEAD (bb) = orig_vec[0];
5846 BB_END (bb) = orig_vec[n_insns - 1];
5847 undo_splits:
5848 free_delay_pairs ();
5849 FOR_BB_INSNS (bb, insn)
5850 if (NONDEBUG_INSN_P (insn))
5851 undo_split_delayed_nonbranch (insn);
5852 return false;
5853 }
5854
5855 /* A callback for the hw-doloop pass. Called when a loop we have discovered
5856 turns out not to be optimizable; we have to split the doloop_end pattern
5857 into a subtract and a test. */
5858 static void
5859 hwloop_fail (hwloop_info loop)
5860 {
5861 rtx insn, test, testreg;
5862
5863 if (dump_file)
5864 fprintf (dump_file, "splitting doloop insn %d\n",
5865 INSN_UID (loop->loop_end));
5866 insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5867 /* See if we can emit the add at the head of the loop rather than at the
5868 end. */
5869 if (loop->head == NULL
5870 || loop->iter_reg_used_outside
5871 || loop->iter_reg_used
5872 || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5873 || loop->incoming_dest != loop->head
5874 || EDGE_COUNT (loop->head->preds) != 2)
5875 emit_insn_before (insn, loop->loop_end);
5876 else
5877 {
5878 rtx_insn *t = loop->start_label;
5879 while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5880 t = NEXT_INSN (t);
5881 emit_insn_after (insn, t);
5882 }
5883
5884 testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5885 if (GET_CODE (testreg) == SCRATCH)
5886 testreg = loop->iter_reg;
5887 else
5888 emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5889
5890 test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5891 insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5892 loop->start_label),
5893 loop->loop_end);
5894
5895 JUMP_LABEL (insn) = loop->start_label;
5896 LABEL_NUSES (loop->start_label)++;
5897 delete_insn (loop->loop_end);
5898 }
5899
5900 static struct hw_doloop_hooks c6x_doloop_hooks =
5901 {
5902 hwloop_pattern_reg,
5903 hwloop_optimize,
5904 hwloop_fail
5905 };
5906
5907 /* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5908 doloop_end patterns where such optimizations are impossible. */
5909 static void
5910 c6x_hwloops (void)
5911 {
5912 if (optimize)
5913 reorg_loops (true, &c6x_doloop_hooks);
5914 }
5915
5916 /* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here
5917 into a sequence that loads the return register and performs the call,
5918 and emit the return label.
5919 If scheduling after reload is requested, it happens here. */
5920
5921 static void
5922 c6x_reorg (void)
5923 {
5924 basic_block bb;
5925 bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5926 && !maybe_skip_selective_scheduling ());
5927
5928 /* We are freeing block_for_insn in the toplev to keep compatibility
5929 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5930 compute_bb_for_insn ();
5931
5932 df_clear_flags (DF_LR_RUN_DCE);
5933 df_note_add_problem ();
5934
5935 /* If optimizing, we'll have split before scheduling. */
5936 if (optimize == 0)
5937 split_all_insns ();
5938
5939 df_analyze ();
5940
5941 if (c6x_flag_schedule_insns2)
5942 {
5943 int sz = get_max_uid () * 3 / 2 + 1;
5944
5945 insn_info.create (sz);
5946 }
5947
5948 /* Make sure the real-jump insns we create are not deleted. When modulo-
5949 scheduling, situations where a reg is only stored in a loop can also
5950 cause dead code when doing the initial unrolling. */
5951 sched_no_dce = true;
5952
5953 c6x_hwloops ();
5954
5955 if (c6x_flag_schedule_insns2)
5956 {
5957 split_delayed_insns ();
5958 timevar_push (TV_SCHED2);
5959 if (do_selsched)
5960 run_selective_scheduling ();
5961 else
5962 schedule_ebbs ();
5963 conditionalize_after_sched ();
5964 timevar_pop (TV_SCHED2);
5965
5966 free_delay_pairs ();
5967 }
5968 sched_no_dce = false;
5969
5970 rtx_insn **call_labels = XCNEWVEC (rtx_insn *, get_max_uid () + 1);
5971
5972 reorg_split_calls (call_labels);
5973
5974 if (c6x_flag_schedule_insns2)
5975 {
5976 FOR_EACH_BB_FN (bb, cfun)
5977 if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5978 assign_reservations (BB_HEAD (bb), BB_END (bb));
5979 }
5980
5981 if (c6x_flag_var_tracking)
5982 {
5983 timevar_push (TV_VAR_TRACKING);
5984 variable_tracking_main ();
5985 timevar_pop (TV_VAR_TRACKING);
5986 }
5987
5988 reorg_emit_nops (call_labels);
5989
5990 /* Post-process the schedule to move parallel insns into SEQUENCEs. */
5991 if (c6x_flag_schedule_insns2)
5992 {
5993 free_delay_pairs ();
5994 c6x_gen_bundles ();
5995 }
5996
5997 df_finish_pass (false);
5998 }
5999
6000 /* Called when a function has been assembled. It should perform all the
6001 tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
6002 tasks.
6003 We free the reservation (and other scheduling) information here now that
6004 all insns have been output. */
6005 void
6006 c6x_function_end (FILE *file, const char *fname)
6007 {
6008 c6x_output_fn_unwind (file);
6009
6010 insn_info.release ();
6011
6012 if (!flag_inhibit_size_directive)
6013 ASM_OUTPUT_MEASURED_SIZE (file, fname);
6014 }
6015 \f
6016 /* Determine whether X is a shift with code CODE and an integer amount
6017 AMOUNT. */
6018 static bool
6019 shift_p (rtx x, enum rtx_code code, int amount)
6020 {
6021 return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
6022 && INTVAL (XEXP (x, 1)) == amount);
6023 }
6024
6025 /* Compute a (partial) cost for rtx X. Return true if the complete
6026 cost has been computed, and false if subexpressions should be
6027 scanned. In either case, *TOTAL contains the cost result. */
6028
6029 static bool
6030 c6x_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
6031 bool speed)
6032 {
6033 int cost2 = COSTS_N_INSNS (1);
6034 rtx op0, op1;
6035 int code = GET_CODE (x);
6036
6037 switch (code)
6038 {
6039 case CONST_INT:
6040 if (outer_code == SET || outer_code == PLUS)
6041 *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6042 else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6043 || outer_code == MINUS)
6044 *total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6045 else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6046 || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6047 *total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6048 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6049 || outer_code == LSHIFTRT)
6050 *total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6051 else
6052 *total = cost2;
6053 return true;
6054
6055 case CONST:
6056 case LABEL_REF:
6057 case SYMBOL_REF:
6058 case CONST_DOUBLE:
6059 *total = COSTS_N_INSNS (2);
6060 return true;
6061
6062 case TRUNCATE:
6063 /* Recognize a mult_highpart operation. */
6064 if ((mode == HImode || mode == SImode)
6065 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6066 && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (mode)
6067 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6068 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6069 && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (mode))
6070 {
6071 rtx mul = XEXP (XEXP (x, 0), 0);
6072 rtx op0 = XEXP (mul, 0);
6073 rtx op1 = XEXP (mul, 1);
6074 enum rtx_code code0 = GET_CODE (op0);
6075 enum rtx_code code1 = GET_CODE (op1);
6076
6077 if ((code0 == code1
6078 && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6079 || (mode == HImode
6080 && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6081 {
6082 if (mode == HImode)
6083 *total = COSTS_N_INSNS (2);
6084 else
6085 *total = COSTS_N_INSNS (12);
6086 mode = GET_MODE (XEXP (op0, 0));
6087 *total += rtx_cost (XEXP (op0, 0), mode, code0, 0, speed);
6088 *total += rtx_cost (XEXP (op1, 0), mode, code1, 0, speed);
6089 return true;
6090 }
6091 }
6092 return false;
6093
6094 case ASHIFT:
6095 case ASHIFTRT:
6096 case LSHIFTRT:
6097 if (mode == DImode)
6098 *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6099 else
6100 *total = COSTS_N_INSNS (1);
6101 return false;
6102
6103 case PLUS:
6104 case MINUS:
6105 *total = COSTS_N_INSNS (1);
6106 op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6107 op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6108 if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6109 && INTEGRAL_MODE_P (mode)
6110 && GET_CODE (op0) == MULT
6111 && GET_CODE (XEXP (op0, 1)) == CONST_INT
6112 && (INTVAL (XEXP (op0, 1)) == 2
6113 || INTVAL (XEXP (op0, 1)) == 4
6114 || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6115 {
6116 *total += rtx_cost (XEXP (op0, 0), mode, ASHIFT, 0, speed);
6117 *total += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
6118 return true;
6119 }
6120 return false;
6121
6122 case MULT:
6123 op0 = XEXP (x, 0);
6124 op1 = XEXP (x, 1);
6125 if (mode == DFmode)
6126 {
6127 if (TARGET_FP)
6128 *total = COSTS_N_INSNS (speed ? 10 : 1);
6129 else
6130 *total = COSTS_N_INSNS (speed ? 200 : 4);
6131 }
6132 else if (mode == SFmode)
6133 {
6134 if (TARGET_FP)
6135 *total = COSTS_N_INSNS (speed ? 4 : 1);
6136 else
6137 *total = COSTS_N_INSNS (speed ? 100 : 4);
6138 }
6139 else if (mode == DImode)
6140 {
6141 if (TARGET_MPY32
6142 && GET_CODE (op0) == GET_CODE (op1)
6143 && (GET_CODE (op0) == ZERO_EXTEND
6144 || GET_CODE (op0) == SIGN_EXTEND))
6145 {
6146 *total = COSTS_N_INSNS (speed ? 2 : 1);
6147 op0 = XEXP (op0, 0);
6148 op1 = XEXP (op1, 0);
6149 }
6150 else
6151 /* Maybe improve this laster. */
6152 *total = COSTS_N_INSNS (20);
6153 }
6154 else if (mode == SImode)
6155 {
6156 if (((GET_CODE (op0) == ZERO_EXTEND
6157 || GET_CODE (op0) == SIGN_EXTEND
6158 || shift_p (op0, LSHIFTRT, 16))
6159 && (GET_CODE (op1) == SIGN_EXTEND
6160 || GET_CODE (op1) == ZERO_EXTEND
6161 || scst5_operand (op1, SImode)
6162 || shift_p (op1, ASHIFTRT, 16)
6163 || shift_p (op1, LSHIFTRT, 16)))
6164 || (shift_p (op0, ASHIFTRT, 16)
6165 && (GET_CODE (op1) == SIGN_EXTEND
6166 || shift_p (op1, ASHIFTRT, 16))))
6167 {
6168 *total = COSTS_N_INSNS (speed ? 2 : 1);
6169 op0 = XEXP (op0, 0);
6170 if (scst5_operand (op1, SImode))
6171 op1 = NULL_RTX;
6172 else
6173 op1 = XEXP (op1, 0);
6174 }
6175 else if (!speed)
6176 *total = COSTS_N_INSNS (1);
6177 else if (TARGET_MPY32)
6178 *total = COSTS_N_INSNS (4);
6179 else
6180 *total = COSTS_N_INSNS (6);
6181 }
6182 else if (mode == HImode)
6183 *total = COSTS_N_INSNS (speed ? 2 : 1);
6184
6185 if (GET_CODE (op0) != REG
6186 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
6187 *total += rtx_cost (op0, mode, MULT, 0, speed);
6188 if (op1 && GET_CODE (op1) != REG
6189 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
6190 *total += rtx_cost (op1, mode, MULT, 1, speed);
6191 return true;
6192
6193 case UDIV:
6194 case DIV:
6195 /* This is a bit random; assuming on average there'll be 16 leading
6196 zeros. FIXME: estimate better for constant dividends. */
6197 *total = COSTS_N_INSNS (6 + 3 * 16);
6198 return false;
6199
6200 case IF_THEN_ELSE:
6201 /* Recognize the cmp_and/ior patterns. */
6202 op0 = XEXP (x, 0);
6203 if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6204 && REG_P (XEXP (op0, 0))
6205 && XEXP (op0, 1) == const0_rtx
6206 && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6207 {
6208 *total = rtx_cost (XEXP (x, 1), VOIDmode, (enum rtx_code) outer_code,
6209 opno, speed);
6210 return false;
6211 }
6212 return false;
6213
6214 default:
6215 return false;
6216 }
6217 }
6218
6219 /* Implements target hook vector_mode_supported_p. */
6220
6221 static bool
6222 c6x_vector_mode_supported_p (machine_mode mode)
6223 {
6224 switch (mode)
6225 {
6226 case V2HImode:
6227 case V4QImode:
6228 case V2SImode:
6229 case V4HImode:
6230 case V8QImode:
6231 return true;
6232 default:
6233 return false;
6234 }
6235 }
6236
6237 /* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6238 static machine_mode
6239 c6x_preferred_simd_mode (machine_mode mode)
6240 {
6241 switch (mode)
6242 {
6243 case HImode:
6244 return V2HImode;
6245 case QImode:
6246 return V4QImode;
6247
6248 default:
6249 return word_mode;
6250 }
6251 }
6252
6253 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6254
6255 static bool
6256 c6x_scalar_mode_supported_p (machine_mode mode)
6257 {
6258 if (ALL_FIXED_POINT_MODE_P (mode)
6259 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6260 return true;
6261
6262 return default_scalar_mode_supported_p (mode);
6263 }
6264
6265 /* Output a reference from a function exception table to the type_info
6266 object X. Output these via a special assembly directive. */
6267
6268 static bool
6269 c6x_output_ttype (rtx x)
6270 {
6271 /* Use special relocations for symbol references. */
6272 if (GET_CODE (x) != CONST_INT)
6273 fputs ("\t.ehtype\t", asm_out_file);
6274 else
6275 fputs ("\t.word\t", asm_out_file);
6276 output_addr_const (asm_out_file, x);
6277 fputc ('\n', asm_out_file);
6278
6279 return TRUE;
6280 }
6281
6282 /* Modify the return address of the current function. */
6283
6284 void
6285 c6x_set_return_address (rtx source, rtx scratch)
6286 {
6287 struct c6x_frame frame;
6288 rtx addr;
6289 HOST_WIDE_INT offset;
6290
6291 c6x_compute_frame_layout (&frame);
6292 if (! c6x_save_reg (RETURN_ADDR_REGNO))
6293 emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6294 else
6295 {
6296
6297 if (frame_pointer_needed)
6298 {
6299 addr = hard_frame_pointer_rtx;
6300 offset = frame.b3_offset;
6301 }
6302 else
6303 {
6304 addr = stack_pointer_rtx;
6305 offset = frame.to_allocate - frame.b3_offset;
6306 }
6307
6308 /* TODO: Use base+offset loads where possible. */
6309 if (offset)
6310 {
6311 HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6312
6313 emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6314 if (low != offset)
6315 emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6316 emit_insn (gen_addsi3 (scratch, addr, scratch));
6317 addr = scratch;
6318 }
6319
6320 emit_move_insn (gen_frame_mem (Pmode, addr), source);
6321 }
6322 }
6323
6324 /* We save pairs of registers using a DImode store. Describe the component
6325 registers for DWARF generation code. */
6326
6327 static rtx
6328 c6x_dwarf_register_span (rtx rtl)
6329 {
6330 unsigned regno;
6331 unsigned real_regno;
6332 int nregs;
6333 int i;
6334 rtx p;
6335
6336 regno = REGNO (rtl);
6337 nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
6338 if (nregs == 1)
6339 return NULL_RTX;
6340
6341 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6342 for (i = 0; i < nregs; i++)
6343 {
6344 if (TARGET_BIG_ENDIAN)
6345 real_regno = regno + nregs - (i + 1);
6346 else
6347 real_regno = regno + i;
6348
6349 XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6350 }
6351
6352 return p;
6353 }
6354 \f
6355 /* Codes for all the C6X builtins. */
6356 enum c6x_builtins
6357 {
6358 C6X_BUILTIN_SADD,
6359 C6X_BUILTIN_SSUB,
6360 C6X_BUILTIN_ADD2,
6361 C6X_BUILTIN_SUB2,
6362 C6X_BUILTIN_ADD4,
6363 C6X_BUILTIN_SUB4,
6364 C6X_BUILTIN_SADD2,
6365 C6X_BUILTIN_SSUB2,
6366 C6X_BUILTIN_SADDU4,
6367
6368 C6X_BUILTIN_SMPY,
6369 C6X_BUILTIN_SMPYH,
6370 C6X_BUILTIN_SMPYHL,
6371 C6X_BUILTIN_SMPYLH,
6372 C6X_BUILTIN_MPY2,
6373 C6X_BUILTIN_SMPY2,
6374
6375 C6X_BUILTIN_CLRR,
6376 C6X_BUILTIN_EXTR,
6377 C6X_BUILTIN_EXTRU,
6378
6379 C6X_BUILTIN_SSHL,
6380 C6X_BUILTIN_SUBC,
6381 C6X_BUILTIN_ABS,
6382 C6X_BUILTIN_ABS2,
6383 C6X_BUILTIN_AVG2,
6384 C6X_BUILTIN_AVGU4,
6385
6386 C6X_BUILTIN_MAX
6387 };
6388
6389
6390 static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6391
6392 /* Return the C6X builtin for CODE. */
6393 static tree
6394 c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6395 {
6396 if (code >= C6X_BUILTIN_MAX)
6397 return error_mark_node;
6398
6399 return c6x_builtin_decls[code];
6400 }
6401
6402 #define def_builtin(NAME, TYPE, CODE) \
6403 do { \
6404 tree bdecl; \
6405 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
6406 NULL, NULL_TREE); \
6407 c6x_builtin_decls[CODE] = bdecl; \
6408 } while (0)
6409
6410 /* Set up all builtin functions for this target. */
6411 static void
6412 c6x_init_builtins (void)
6413 {
6414 tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6415 tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6416 tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6417 tree int_ftype_int
6418 = build_function_type_list (integer_type_node, integer_type_node,
6419 NULL_TREE);
6420 tree int_ftype_int_int
6421 = build_function_type_list (integer_type_node, integer_type_node,
6422 integer_type_node, NULL_TREE);
6423 tree v2hi_ftype_v2hi
6424 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6425 tree v4qi_ftype_v4qi_v4qi
6426 = build_function_type_list (V4QI_type_node, V4QI_type_node,
6427 V4QI_type_node, NULL_TREE);
6428 tree v2hi_ftype_v2hi_v2hi
6429 = build_function_type_list (V2HI_type_node, V2HI_type_node,
6430 V2HI_type_node, NULL_TREE);
6431 tree v2si_ftype_v2hi_v2hi
6432 = build_function_type_list (V2SI_type_node, V2HI_type_node,
6433 V2HI_type_node, NULL_TREE);
6434
6435 def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6436 C6X_BUILTIN_SADD);
6437 def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6438 C6X_BUILTIN_SSUB);
6439 def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6440 C6X_BUILTIN_ADD2);
6441 def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6442 C6X_BUILTIN_SUB2);
6443 def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6444 C6X_BUILTIN_ADD4);
6445 def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6446 C6X_BUILTIN_SUB4);
6447 def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6448 C6X_BUILTIN_MPY2);
6449 def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6450 C6X_BUILTIN_SADD2);
6451 def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6452 C6X_BUILTIN_SSUB2);
6453 def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6454 C6X_BUILTIN_SADDU4);
6455 def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6456 C6X_BUILTIN_SMPY2);
6457
6458 def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6459 C6X_BUILTIN_SMPY);
6460 def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6461 C6X_BUILTIN_SMPYH);
6462 def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6463 C6X_BUILTIN_SMPYHL);
6464 def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6465 C6X_BUILTIN_SMPYLH);
6466
6467 def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6468 C6X_BUILTIN_SSHL);
6469 def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6470 C6X_BUILTIN_SUBC);
6471
6472 def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6473 C6X_BUILTIN_AVG2);
6474 def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6475 C6X_BUILTIN_AVGU4);
6476
6477 def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6478 C6X_BUILTIN_CLRR);
6479 def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6480 C6X_BUILTIN_EXTR);
6481 def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6482 C6X_BUILTIN_EXTRU);
6483
6484 def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6485 def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6486 }
6487
6488
6489 struct builtin_description
6490 {
6491 const enum insn_code icode;
6492 const char *const name;
6493 const enum c6x_builtins code;
6494 };
6495
6496 static const struct builtin_description bdesc_2arg[] =
6497 {
6498 { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6499 { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6500 { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6501 { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6502 { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6503 { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6504 { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6505 { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6506 { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6507
6508 { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6509 { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6510
6511 { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6512 { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6513
6514 { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6515 { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6516 { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6517 { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6518
6519 { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6520
6521 { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6522 { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6523 { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6524 };
6525
6526 static const struct builtin_description bdesc_1arg[] =
6527 {
6528 { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6529 { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6530 };
6531
6532 /* Errors in the source file can cause expand_expr to return const0_rtx
6533 where we expect a vector. To avoid crashing, use one of the vector
6534 clear instructions. */
6535 static rtx
6536 safe_vector_operand (rtx x, machine_mode mode)
6537 {
6538 if (x != const0_rtx)
6539 return x;
6540 x = gen_reg_rtx (SImode);
6541
6542 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6543 return gen_lowpart (mode, x);
6544 }
6545
6546 /* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1
6547 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6548
6549 static rtx
6550 c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6551 bool match_op)
6552 {
6553 int offs = match_op ? 1 : 0;
6554 rtx pat;
6555 tree arg0 = CALL_EXPR_ARG (exp, 0);
6556 tree arg1 = CALL_EXPR_ARG (exp, 1);
6557 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6558 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6559 machine_mode op0mode = GET_MODE (op0);
6560 machine_mode op1mode = GET_MODE (op1);
6561 machine_mode tmode = insn_data[icode].operand[0].mode;
6562 machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6563 machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6564 rtx ret = target;
6565
6566 if (VECTOR_MODE_P (mode0))
6567 op0 = safe_vector_operand (op0, mode0);
6568 if (VECTOR_MODE_P (mode1))
6569 op1 = safe_vector_operand (op1, mode1);
6570
6571 if (! target
6572 || GET_MODE (target) != tmode
6573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6574 {
6575 if (tmode == SQmode || tmode == V2SQmode)
6576 {
6577 ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6578 target = gen_lowpart (tmode, ret);
6579 }
6580 else
6581 target = gen_reg_rtx (tmode);
6582 }
6583
6584 if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6585 && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6586 {
6587 op0mode = mode0;
6588 op0 = gen_lowpart (mode0, op0);
6589 }
6590 if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6591 && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6592 {
6593 op1mode = mode1;
6594 op1 = gen_lowpart (mode1, op1);
6595 }
6596 /* In case the insn wants input operands in modes different from
6597 the result, abort. */
6598 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6599 && (op1mode == mode1 || op1mode == VOIDmode));
6600
6601 if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6602 op0 = copy_to_mode_reg (mode0, op0);
6603 if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6604 op1 = copy_to_mode_reg (mode1, op1);
6605
6606 if (match_op)
6607 pat = GEN_FCN (icode) (target, target, op0, op1);
6608 else
6609 pat = GEN_FCN (icode) (target, op0, op1);
6610
6611 if (! pat)
6612 return 0;
6613
6614 emit_insn (pat);
6615
6616 return ret;
6617 }
6618
6619 /* Subroutine of c6x_expand_builtin to take care of unop insns. */
6620
6621 static rtx
6622 c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6623 rtx target)
6624 {
6625 rtx pat;
6626 tree arg0 = CALL_EXPR_ARG (exp, 0);
6627 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6628 machine_mode op0mode = GET_MODE (op0);
6629 machine_mode tmode = insn_data[icode].operand[0].mode;
6630 machine_mode mode0 = insn_data[icode].operand[1].mode;
6631
6632 if (! target
6633 || GET_MODE (target) != tmode
6634 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6635 target = gen_reg_rtx (tmode);
6636
6637 if (VECTOR_MODE_P (mode0))
6638 op0 = safe_vector_operand (op0, mode0);
6639
6640 if (op0mode == SImode && mode0 == HImode)
6641 {
6642 op0mode = HImode;
6643 op0 = gen_lowpart (HImode, op0);
6644 }
6645 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6646
6647 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6648 op0 = copy_to_mode_reg (mode0, op0);
6649
6650 pat = GEN_FCN (icode) (target, op0);
6651 if (! pat)
6652 return 0;
6653 emit_insn (pat);
6654 return target;
6655 }
6656
6657 /* Expand an expression EXP that calls a built-in function,
6658 with result going to TARGET if that's convenient
6659 (and in mode MODE if that's convenient).
6660 SUBTARGET may be used as the target for computing one of EXP's operands.
6661 IGNORE is nonzero if the value is to be ignored. */
6662
6663 static rtx
6664 c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6665 rtx subtarget ATTRIBUTE_UNUSED,
6666 machine_mode mode ATTRIBUTE_UNUSED,
6667 int ignore ATTRIBUTE_UNUSED)
6668 {
6669 size_t i;
6670 const struct builtin_description *d;
6671 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6672 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6673
6674 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6675 if (d->code == fcode)
6676 return c6x_expand_binop_builtin (d->icode, exp, target,
6677 fcode == C6X_BUILTIN_CLRR);
6678
6679 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6680 if (d->code == fcode)
6681 return c6x_expand_unop_builtin (d->icode, exp, target);
6682
6683 gcc_unreachable ();
6684 }
6685
6686 /* Target unwind frame info is generated from dwarf CFI directives, so
6687 always output dwarf2 unwind info. */
6688
6689 static enum unwind_info_type
6690 c6x_debug_unwind_info (void)
6691 {
6692 if (flag_unwind_tables || flag_exceptions)
6693 return UI_DWARF2;
6694
6695 return default_debug_unwind_info ();
6696 }
6697 \f
6698 /* Target Structure. */
6699
6700 /* Initialize the GCC target structure. */
6701 #undef TARGET_FUNCTION_ARG
6702 #define TARGET_FUNCTION_ARG c6x_function_arg
6703 #undef TARGET_FUNCTION_ARG_ADVANCE
6704 #define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6705 #undef TARGET_FUNCTION_ARG_BOUNDARY
6706 #define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6707 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6708 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6709 c6x_function_arg_round_boundary
6710 #undef TARGET_FUNCTION_VALUE_REGNO_P
6711 #define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6712 #undef TARGET_FUNCTION_VALUE
6713 #define TARGET_FUNCTION_VALUE c6x_function_value
6714 #undef TARGET_LIBCALL_VALUE
6715 #define TARGET_LIBCALL_VALUE c6x_libcall_value
6716 #undef TARGET_RETURN_IN_MEMORY
6717 #define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6718 #undef TARGET_RETURN_IN_MSB
6719 #define TARGET_RETURN_IN_MSB c6x_return_in_msb
6720 #undef TARGET_PASS_BY_REFERENCE
6721 #define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6722 #undef TARGET_CALLEE_COPIES
6723 #define TARGET_CALLEE_COPIES c6x_callee_copies
6724 #undef TARGET_STRUCT_VALUE_RTX
6725 #define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6726 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6727 #define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6728
6729 #undef TARGET_ASM_OUTPUT_MI_THUNK
6730 #define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6733
6734 #undef TARGET_BUILD_BUILTIN_VA_LIST
6735 #define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6736
6737 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6738 #define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6739 #undef TARGET_TRAMPOLINE_INIT
6740 #define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6741
6742 #undef TARGET_LEGITIMATE_CONSTANT_P
6743 #define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6744 #undef TARGET_LEGITIMATE_ADDRESS_P
6745 #define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6746
6747 #undef TARGET_LRA_P
6748 #define TARGET_LRA_P hook_bool_void_false
6749
6750 #undef TARGET_IN_SMALL_DATA_P
6751 #define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6752 #undef TARGET_ASM_SELECT_RTX_SECTION
6753 #define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section
6754 #undef TARGET_ASM_SELECT_SECTION
6755 #define TARGET_ASM_SELECT_SECTION c6x_elf_select_section
6756 #undef TARGET_ASM_UNIQUE_SECTION
6757 #define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section
6758 #undef TARGET_SECTION_TYPE_FLAGS
6759 #define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags
6760 #undef TARGET_HAVE_SRODATA_SECTION
6761 #define TARGET_HAVE_SRODATA_SECTION true
6762 #undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6763 #define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6764
6765 #undef TARGET_OPTION_OVERRIDE
6766 #define TARGET_OPTION_OVERRIDE c6x_option_override
6767 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6768 #define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6769
6770 #undef TARGET_INIT_LIBFUNCS
6771 #define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6772 #undef TARGET_LIBFUNC_GNU_PREFIX
6773 #define TARGET_LIBFUNC_GNU_PREFIX true
6774
6775 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6776 #define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6777 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6778 #define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6779 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6780 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6781
6782 #undef TARGET_RTX_COSTS
6783 #define TARGET_RTX_COSTS c6x_rtx_costs
6784
6785 #undef TARGET_SCHED_INIT
6786 #define TARGET_SCHED_INIT c6x_sched_init
6787 #undef TARGET_SCHED_SET_SCHED_FLAGS
6788 #define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6789 #undef TARGET_SCHED_ADJUST_COST
6790 #define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6791 #undef TARGET_SCHED_ISSUE_RATE
6792 #define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6793 #undef TARGET_SCHED_VARIABLE_ISSUE
6794 #define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6795 #undef TARGET_SCHED_REORDER
6796 #define TARGET_SCHED_REORDER c6x_sched_reorder
6797 #undef TARGET_SCHED_REORDER2
6798 #define TARGET_SCHED_REORDER2 c6x_sched_reorder2
6799 #undef TARGET_SCHED_DFA_NEW_CYCLE
6800 #define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6801 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6802 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
6803 #undef TARGET_SCHED_EXPOSED_PIPELINE
6804 #define TARGET_SCHED_EXPOSED_PIPELINE true
6805
6806 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6807 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6808 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
6809 #define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6810 #undef TARGET_SCHED_SET_SCHED_CONTEXT
6811 #define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
6812 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6813 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
6814 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
6815 #define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6816
6817 #undef TARGET_CAN_ELIMINATE
6818 #define TARGET_CAN_ELIMINATE c6x_can_eliminate
6819
6820 #undef TARGET_PREFERRED_RENAME_CLASS
6821 #define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6822
6823 #undef TARGET_MACHINE_DEPENDENT_REORG
6824 #define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6825
6826 #undef TARGET_ASM_FILE_START
6827 #define TARGET_ASM_FILE_START c6x_file_start
6828
6829 #undef TARGET_PRINT_OPERAND
6830 #define TARGET_PRINT_OPERAND c6x_print_operand
6831 #undef TARGET_PRINT_OPERAND_ADDRESS
6832 #define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6833 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
6834 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6835
6836 /* C6x unwinding tables use a different format for the typeinfo tables. */
6837 #undef TARGET_ASM_TTYPE
6838 #define TARGET_ASM_TTYPE c6x_output_ttype
6839
6840 /* The C6x ABI follows the ARM EABI exception handling rules. */
6841 #undef TARGET_ARM_EABI_UNWINDER
6842 #define TARGET_ARM_EABI_UNWINDER true
6843
6844 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6845 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6846
6847 #undef TARGET_ASM_INIT_SECTIONS
6848 #define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6849
6850 #undef TARGET_DEBUG_UNWIND_INFO
6851 #define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info
6852
6853 #undef TARGET_DWARF_REGISTER_SPAN
6854 #define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6855
6856 #undef TARGET_INIT_BUILTINS
6857 #define TARGET_INIT_BUILTINS c6x_init_builtins
6858 #undef TARGET_EXPAND_BUILTIN
6859 #define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6860 #undef TARGET_BUILTIN_DECL
6861 #define TARGET_BUILTIN_DECL c6x_builtin_decl
6862
6863 struct gcc_target targetm = TARGET_INITIALIZER;
6864
6865 #include "gt-c6x.h"