]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pru/pru.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / pru / pru.c
1 /* Target machine subroutines for TI PRU.
2 Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 Dimitar Dimitrov <dimitar@dinux.eu>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "df.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "explow.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "expr.h"
47 #include "toplev.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "stor-layout.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
53 #include "pru-protos.h"
54
55 /* This file should be included last. */
56 #include "target-def.h"
57
58 #define INIT_ARRAY_ENTRY_BYTES 2
59
60 /* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62 struct pru_ctable_entry pru_ctable[32];
63
64 /* Forward function declarations. */
65 static bool prologue_saved_reg_p (int);
66 static void pru_reorg_loop (rtx_insn *);
67
68 struct GTY (()) machine_function
69 {
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
72
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
91 };
92 \f
93 /* Stack layout and calling conventions.
94
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
98
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
110
111 */
112
113 #define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115 /* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116 static void
117 pru_compute_frame_layout (void)
118 {
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
125
126 gcc_assert (!cfun->machine->initialized);
127
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
130
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
134
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
139 {
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
142 }
143
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
155 }
156
157 /* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
163 */
164 static rtx
165 pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
167 {
168 rtx insn;
169
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
175 {
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
182 {
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185 }
186 else
187 {
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190 }
191 }
192
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197 RTX_FRAME_RELATED_P (insn) = 1;
198
199 return insn;
200 }
201
202 /* Add a const_int to the stack pointer register. */
203 static rtx
204 pru_add_to_sp (int addendum, const enum reg_note kind)
205 {
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
208 }
209
210 /* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212 static int
213 xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214 {
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
218
219 nregs = 0;
220
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
227
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231 {
232 regno++;
233 nregs++;
234 }
235 else
236 break;
237
238 if (!nregs)
239 return -1;
240
241 gcc_assert (UBYTE_INT (*sp_offset));
242
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
254
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
258
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
261
262 if (!do_store)
263 {
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267 }
268
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
271
272 return regno_start + nregs;
273 }
274
275 /* Emit function prologue. */
276 void
277 pru_expand_prologue (void)
278 {
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
284
285 total_frame_size = cfun->machine->total_size;
286
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
289
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
292 {
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
298 }
299 else if (total_frame_size)
300 {
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
304 }
305 else
306 save_regs_base = sp_offset = 0;
307
308 regno_start = 0;
309 save_offset = save_regs_base;
310 do
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
313
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
317 {
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
321
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
324 }
325
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
333 }
334
335 /* Emit function epilogue. */
336 void
337 pru_expand_epilogue (bool sibcall_p)
338 {
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
342
343 if (!sibcall_p && pru_can_use_return_insn ())
344 {
345 emit_jump_insn (gen_return ());
346 return;
347 }
348
349 emit_insn (gen_blockage ());
350
351 total_frame_size = cfun->machine->total_size;
352
353 if (frame_pointer_needed)
354 {
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
359
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362 }
363 else if (!UBYTE_INT (total_frame_size))
364 {
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368 }
369 else
370 {
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
373 }
374
375 regno_start = 0;
376 do
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
379
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
382
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
386
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
392 }
393
394 /* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
396 rtx
397 pru_get_return_address (int count)
398 {
399 if (count != 0)
400 return NULL_RTX;
401
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
404 }
405
406 /* Implement FUNCTION_PROFILER macro. */
407 void
408 pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409 {
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
413 }
414
415 /* Dump stack layout. */
416 static void
417 pru_dump_frame_layout (FILE *file)
418 {
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
438 }
439
440 /* Return true if REGNO should be saved in the prologue. */
441 static bool
442 prologue_saved_reg_p (int regno)
443 {
444 gcc_assert (GP_REG_P (regno));
445
446 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
447 return true;
448
449 /* 32-bit FP. */
450 if (frame_pointer_needed
451 && regno >= HARD_FRAME_POINTER_REGNUM
452 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453 return true;
454
455 /* 16-bit RA. */
456 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457 return true;
458 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459 return true;
460
461 return false;
462 }
463
464 /* Implement TARGET_CAN_ELIMINATE. */
465 static bool
466 pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467 {
468 if (to == STACK_POINTER_REGNUM)
469 return !frame_pointer_needed;
470 return true;
471 }
472
473 /* Implement INITIAL_ELIMINATION_OFFSET macro. */
474 int
475 pru_initial_elimination_offset (int from, int to)
476 {
477 int offset;
478
479 /* Set OFFSET to the offset from the stack pointer. */
480 switch (from)
481 {
482 case FRAME_POINTER_REGNUM:
483 offset = cfun->machine->out_args_size;
484 break;
485
486 case ARG_POINTER_REGNUM:
487 offset = cfun->machine->total_size;
488 offset -= crtl->args.pretend_args_size;
489 break;
490
491 default:
492 gcc_unreachable ();
493 }
494
495 /* If we are asked for the frame pointer offset, then adjust OFFSET
496 by the offset from the frame pointer to the stack pointer. */
497 if (to == HARD_FRAME_POINTER_REGNUM)
498 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501 return offset;
502 }
503
504 /* Return nonzero if this function is known to have a null epilogue.
505 This allows the optimizer to omit jumps to jumps if no stack
506 was created. */
507 int
508 pru_can_use_return_insn (void)
509 {
510 if (!reload_completed || crtl->profile)
511 return 0;
512
513 return cfun->machine->total_size == 0;
514 }
515 \f
516 /* Implement TARGET_HARD_REGNO_MODE_OK. */
517
518 static bool
519 pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520 {
521 switch (GET_MODE_SIZE (mode))
522 {
523 case 1: return true;
524 case 2: return (regno % 4) <= 2;
525 case 4: return (regno % 4) == 0;
526 case 8: return (regno % 4) == 0;
527 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
528 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
529 default:
530 /* TODO: Find out why VOIDmode and BLKmode are passed. */
531 gcc_assert (mode == BLKmode || mode == VOIDmode);
532 return (regno % 4) == 0;
533 }
534 }
535
536 /* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537 Returns true if REGNO is safe to be allocated as a scratch
538 register (for a define_peephole2) in the current function. */
539
540 static bool
541 pru_hard_regno_scratch_ok (unsigned int regno)
542 {
543 /* Don't allow hard registers that might be part of the frame pointer.
544 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545 and don't handle a frame pointer that spans more than one register.
546 TODO: Fix those faulty places. */
547
548 if ((!reload_completed || frame_pointer_needed)
549 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550 HARD_FRAME_POINTER_REGNUM + 3)
551 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552 FRAME_POINTER_REGNUM + 3)))
553 return false;
554
555 return true;
556 }
557
558
559 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
560
561 static bool
562 pru_hard_regno_call_part_clobbered (unsigned, unsigned regno,
563 machine_mode mode)
564 {
565 HARD_REG_SET caller_saved_set;
566 HARD_REG_SET callee_saved_set;
567
568 CLEAR_HARD_REG_SET (caller_saved_set);
569 CLEAR_HARD_REG_SET (callee_saved_set);
570
571 /* r0 and r1 are caller saved. */
572 add_range_to_hard_reg_set (&caller_saved_set, 0, 2 * 4);
573
574 add_range_to_hard_reg_set (&caller_saved_set, FIRST_ARG_REGNUM,
575 LAST_ARG_REGNUM + 1 - FIRST_ARG_REGNUM);
576
577 /* Treat SP as callee saved. */
578 add_range_to_hard_reg_set (&callee_saved_set, STACK_POINTER_REGNUM, 4);
579
580 /* r3 to r13 are callee saved. */
581 add_range_to_hard_reg_set (&callee_saved_set, FIRST_CALLEE_SAVED_REGNUM,
582 LAST_CALEE_SAVED_REGNUM + 1
583 - FIRST_CALLEE_SAVED_REGNUM);
584
585 return overlaps_hard_reg_set_p (caller_saved_set, mode, regno)
586 && overlaps_hard_reg_set_p (callee_saved_set, mode, regno);
587 }
588
589
590 /* Worker function for `HARD_REGNO_RENAME_OK'.
591 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
592
593 int
594 pru_hard_regno_rename_ok (unsigned int old_reg,
595 unsigned int new_reg)
596 {
597 /* Don't allow hard registers that might be part of the frame pointer.
598 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
599 and don't care for a frame pointer that spans more than one register.
600 TODO: Fix those faulty places. */
601 if ((!reload_completed || frame_pointer_needed)
602 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
603 HARD_FRAME_POINTER_REGNUM + 3)
604 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
605 FRAME_POINTER_REGNUM + 3)
606 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
607 HARD_FRAME_POINTER_REGNUM + 3)
608 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
609 FRAME_POINTER_REGNUM + 3)))
610 return 0;
611
612 return 1;
613 }
614 \f
615 /* Allocate a chunk of memory for per-function machine-dependent data. */
616 static struct machine_function *
617 pru_init_machine_status (void)
618 {
619 return ggc_cleared_alloc<machine_function> ();
620 }
621
622 /* Implement TARGET_OPTION_OVERRIDE. */
623 static void
624 pru_option_override (void)
625 {
626 #ifdef SUBTARGET_OVERRIDE_OPTIONS
627 SUBTARGET_OVERRIDE_OPTIONS;
628 #endif
629
630 /* Check for unsupported options. */
631 if (flag_pic == 1)
632 warning (OPT_fpic, "%<-fpic%> is not supported");
633 if (flag_pic == 2)
634 warning (OPT_fPIC, "%<-fPIC%> is not supported");
635 if (flag_pie == 1)
636 warning (OPT_fpie, "%<-fpie%> is not supported");
637 if (flag_pie == 2)
638 warning (OPT_fPIE, "%<-fPIE%> is not supported");
639
640 /* QBxx conditional branching cannot cope with block reordering. */
641 if (flag_reorder_blocks_and_partition)
642 {
643 inform (input_location, "%<-freorder-blocks-and-partition%> "
644 "not supported on this architecture");
645 flag_reorder_blocks_and_partition = 0;
646 flag_reorder_blocks = 1;
647 }
648
649 /* Function to allocate machine-dependent function status. */
650 init_machine_status = &pru_init_machine_status;
651
652 /* Save the initial options in case the user does function specific
653 options. */
654 target_option_default_node = target_option_current_node
655 = build_target_option_node (&global_options);
656
657 /* Due to difficulties in implementing the TI ABI with GCC,
658 at least check and error-out if GCC cannot compile a
659 compliant output. */
660 pru_register_abicheck_pass ();
661 }
662 \f
663 /* Compute a (partial) cost for rtx X. Return true if the complete
664 cost has been computed, and false if subexpressions should be
665 scanned. In either case, *TOTAL contains the cost result. */
666 static bool
667 pru_rtx_costs (rtx x, machine_mode mode,
668 int outer_code, int opno ATTRIBUTE_UNUSED,
669 int *total, bool speed ATTRIBUTE_UNUSED)
670 {
671 const int code = GET_CODE (x);
672
673 switch (code)
674 {
675 case CONST_INT:
676 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
677 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
678 {
679 *total = COSTS_N_INSNS (0);
680 return true;
681 }
682 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
683 || (mode != VOIDmode && const_uhword_operand (x, mode)))
684 {
685 *total = COSTS_N_INSNS (1);
686 return true;
687 }
688 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
689 {
690 *total = COSTS_N_INSNS (0);
691 return true;
692 }
693 else
694 {
695 *total = COSTS_N_INSNS (2);
696 return true;
697 }
698
699 case LABEL_REF:
700 case SYMBOL_REF:
701 case CONST:
702 {
703 *total = COSTS_N_INSNS (1);
704 return true;
705 }
706 case CONST_DOUBLE:
707 {
708 *total = COSTS_N_INSNS (2);
709 return true;
710 }
711 case CONST_WIDE_INT:
712 {
713 /* PRU declares no vector or very large integer types. */
714 gcc_unreachable ();
715 return true;
716 }
717 case SET:
718 {
719 int factor;
720
721 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
722 the mode for the factor. */
723 mode = GET_MODE (SET_DEST (x));
724
725 /* SI move has the same cost as a QI move. Moves larger than
726 64 bits are costly. */
727 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
728 *total = factor * COSTS_N_INSNS (1);
729
730 return false;
731 }
732
733 case MULT:
734 {
735 /* Factor in that "mul" requires fixed registers, which
736 would likely require register moves. */
737 *total = COSTS_N_INSNS (7);
738 return false;
739 }
740 case PLUS:
741 {
742 rtx op0 = XEXP (x, 0);
743 rtx op1 = XEXP (x, 1);
744 machine_mode op1_mode = GET_MODE (op1);
745
746 /* Generic RTL address expressions do not enforce mode for
747 offsets, yet our UBYTE constraint requires it. Fix it here. */
748 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
749 op1_mode = Pmode;
750 if (outer_code == MEM
751 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
752 || ctable_addr_operand (op0, VOIDmode)
753 || ctable_addr_operand (op1, VOIDmode)
754 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
755 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
756 {
757 /* CTABLE or REG base addressing - PLUS comes for free. */
758 *total = COSTS_N_INSNS (0);
759 return true;
760 }
761 else
762 {
763 *total = COSTS_N_INSNS (1);
764 return false;
765 }
766 }
767 case SIGN_EXTEND:
768 {
769 *total = COSTS_N_INSNS (3);
770 return false;
771 }
772 case ASHIFTRT:
773 {
774 rtx op1 = XEXP (x, 1);
775 if (const_1_operand (op1, VOIDmode))
776 *total = COSTS_N_INSNS (3);
777 else
778 *total = COSTS_N_INSNS (7);
779 return false;
780 }
781 case ZERO_EXTRACT:
782 {
783 rtx op2 = XEXP (x, 2);
784 if ((outer_code == EQ || outer_code == NE)
785 && CONST_INT_P (op2)
786 && INTVAL (op2) == 1)
787 {
788 /* Branch if bit is set/clear is a single instruction. */
789 *total = COSTS_N_INSNS (0);
790 return true;
791 }
792 else
793 {
794 *total = COSTS_N_INSNS (2);
795 return false;
796 }
797 }
798 case ZERO_EXTEND:
799 {
800 *total = COSTS_N_INSNS (0);
801 return false;
802 }
803
804 default:
805 {
806 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
807 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
808 *total = factor * COSTS_N_INSNS (1);
809 return false;
810 }
811 }
812 }
813 \f
814 static GTY(()) rtx eqdf_libfunc;
815 static GTY(()) rtx nedf_libfunc;
816 static GTY(()) rtx ledf_libfunc;
817 static GTY(()) rtx ltdf_libfunc;
818 static GTY(()) rtx gedf_libfunc;
819 static GTY(()) rtx gtdf_libfunc;
820 static GTY(()) rtx eqsf_libfunc;
821 static GTY(()) rtx nesf_libfunc;
822 static GTY(()) rtx lesf_libfunc;
823 static GTY(()) rtx ltsf_libfunc;
824 static GTY(()) rtx gesf_libfunc;
825 static GTY(()) rtx gtsf_libfunc;
826
827 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
828 functions to match the PRU ABI. */
829
830 static void
831 pru_init_libfuncs (void)
832 {
833 /* Double-precision floating-point arithmetic. */
834 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
835 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
836 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
837 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
838 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
839
840 /* Single-precision floating-point arithmetic. */
841 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
842 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
843 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
844 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
845 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
846
847 /* Floating-point comparisons. */
848 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
849 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
850 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
851 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
852 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
853 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
854 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
855 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
856 ledf_libfunc = init_one_libfunc ("__pruabi_led");
857 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
858 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
859 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
860
861 /* In PRU ABI, much like other TI processors, floating point
862 comparisons return non-standard values. This quirk is handled
863 by disabling the optab library functions, and handling the
864 comparison during RTL expansion. */
865 set_optab_libfunc (eq_optab, SFmode, NULL);
866 set_optab_libfunc (ne_optab, SFmode, NULL);
867 set_optab_libfunc (gt_optab, SFmode, NULL);
868 set_optab_libfunc (ge_optab, SFmode, NULL);
869 set_optab_libfunc (lt_optab, SFmode, NULL);
870 set_optab_libfunc (le_optab, SFmode, NULL);
871 set_optab_libfunc (eq_optab, DFmode, NULL);
872 set_optab_libfunc (ne_optab, DFmode, NULL);
873 set_optab_libfunc (gt_optab, DFmode, NULL);
874 set_optab_libfunc (ge_optab, DFmode, NULL);
875 set_optab_libfunc (lt_optab, DFmode, NULL);
876 set_optab_libfunc (le_optab, DFmode, NULL);
877
878 /* The isunordered function appears to be supported only by GCC. */
879 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
880 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
881
882 /* Floating-point to integer conversions. */
883 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
884 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
885 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
886 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
887 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
888 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
889 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
890 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
891
892 /* Conversions between floating types. */
893 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
894 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
895
896 /* Integer to floating-point conversions. */
897 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
898 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
899 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
900 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
901 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
902 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
903 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
904 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
905
906 /* Long long. */
907 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
908 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
909 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
910 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
911
912 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
913 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
914 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
915 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
916 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
917 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
918 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
919 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
920 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
921 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
922 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
923 }
924
925
926 /* Emit comparison instruction if necessary, returning the expression
927 that holds the compare result in the proper mode. Return the comparison
928 that should be used in the jump insn. */
929
930 rtx
931 pru_expand_fp_compare (rtx comparison, machine_mode mode)
932 {
933 enum rtx_code code = GET_CODE (comparison);
934 rtx op0 = XEXP (comparison, 0);
935 rtx op1 = XEXP (comparison, 1);
936 rtx cmp;
937 enum rtx_code jump_code = code;
938 machine_mode op_mode = GET_MODE (op0);
939 rtx_insn *insns;
940 rtx libfunc;
941
942 gcc_assert (op_mode == DFmode || op_mode == SFmode);
943
944 /* FP exceptions are not raised by PRU's softfp implementation. So the
945 following transformations are safe. */
946 if (code == UNGE)
947 {
948 code = LT;
949 jump_code = EQ;
950 }
951 else if (code == UNLE)
952 {
953 code = GT;
954 jump_code = EQ;
955 }
956 else
957 jump_code = NE;
958
959 switch (code)
960 {
961 case EQ:
962 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
963 break;
964 case NE:
965 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
966 break;
967 case GT:
968 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
969 break;
970 case GE:
971 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
972 break;
973 case LT:
974 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
975 break;
976 case LE:
977 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
978 break;
979 default:
980 gcc_unreachable ();
981 }
982 start_sequence ();
983
984 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
985 op0, op_mode, op1, op_mode);
986 insns = get_insns ();
987 end_sequence ();
988
989 emit_libcall_block (insns, cmp, cmp,
990 gen_rtx_fmt_ee (code, SImode, op0, op1));
991
992 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
993 }
994 \f
995 /* Return the sign bit position for given OP's mode. */
996 static int
997 sign_bit_position (const rtx op)
998 {
999 const int sz = GET_MODE_SIZE (GET_MODE (op));
1000
1001 return sz * 8 - 1;
1002 }
1003
1004 /* Output asm code for sign_extend operation. */
1005 const char *
1006 pru_output_sign_extend (rtx *operands)
1007 {
1008 static char buf[512];
1009 int bufi;
1010 const int dst_sz = GET_MODE_SIZE (GET_MODE (operands[0]));
1011 const int src_sz = GET_MODE_SIZE (GET_MODE (operands[1]));
1012 char ext_start;
1013
1014 switch (src_sz)
1015 {
1016 case 1: ext_start = 'y'; break;
1017 case 2: ext_start = 'z'; break;
1018 default: gcc_unreachable ();
1019 }
1020
1021 gcc_assert (dst_sz > src_sz);
1022
1023 /* Note that src and dst can be different parts of the same
1024 register, e.g. "r7, r7.w1". */
1025 bufi = snprintf (buf, sizeof (buf),
1026 "mov\t%%0, %%1\n\t" /* Copy AND make positive. */
1027 "qbbc\t.+8, %%0, %d\n\t" /* Check sign bit. */
1028 "fill\t%%%c0, %d", /* Make negative. */
1029 sign_bit_position (operands[1]),
1030 ext_start,
1031 dst_sz - src_sz);
1032
1033 gcc_assert (bufi > 0);
1034 gcc_assert ((unsigned int) bufi < sizeof (buf));
1035
1036 return buf;
1037 }
1038 \f
1039 /* Branches and compares. */
1040
1041 /* PRU's ALU does not support signed comparison operations. That's why we
1042 emulate them. By first checking the sign bit and handling every possible
1043 operand sign combination, we can simulate signed comparisons in just
1044 5 instructions. See table below.
1045
1046 .-------------------.---------------------------------------------------.
1047 | Operand sign bit | Mapping the signed comparison to an unsigned one |
1048 |---------+---------+------------+------------+------------+------------|
1049 | OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1050 |---------+---------+------------+------------+------------+------------|
1051 | 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1052 |---------+---------+------------+------------+------------+------------|
1053 | 0 | 1 | false | false | true | true |
1054 |---------+---------+------------+------------+------------+------------|
1055 | 1 | 0 | true | true | false | false |
1056 |---------+---------+------------+------------+------------+------------|
1057 | 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1058 `---------'---------'------------'------------'------------+------------'
1059
1060
1061 Given the table above, here is an example for a concrete op:
1062 LT:
1063 qbbc OP1_POS, OP1, 31
1064 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1065 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1066 ; jmp OUT -> can be eliminated because we'll take the
1067 ; following branch. OP2.b31 is guaranteed to be 1
1068 ; by the time we get here.
1069 OP1_POS: qbbs OUT, OP2, 31
1070 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1071 #if FAR_JUMP
1072 jmp OUT
1073 BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1074 #endif
1075 OUT:
1076
1077 */
1078
1079 /* Output asm code for a signed-compare LT/LE conditional branch. */
1080 static const char *
1081 pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1082 {
1083 static char buf[1024];
1084 enum rtx_code code = GET_CODE (operands[0]);
1085 rtx op1;
1086 rtx op2;
1087 const char *cmp_opstr;
1088 int bufi = 0;
1089
1090 op1 = operands[1];
1091 op2 = operands[2];
1092
1093 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1094
1095 /* Determine the comparison operators for positive and negative operands. */
1096 if (code == LT)
1097 cmp_opstr = "qblt";
1098 else if (code == LE)
1099 cmp_opstr = "qble";
1100 else
1101 gcc_unreachable ();
1102
1103 if (is_near)
1104 bufi = snprintf (buf, sizeof (buf),
1105 "qbbc\t.+12, %%1, %d\n\t"
1106 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1107 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1108 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1109 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1110 sign_bit_position (op1),
1111 sign_bit_position (op2),
1112 cmp_opstr,
1113 sign_bit_position (op2),
1114 cmp_opstr);
1115 else
1116 bufi = snprintf (buf, sizeof (buf),
1117 "qbbc\t.+12, %%1, %d\n\t"
1118 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1119 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1120 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1121 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1122 "jmp\t.+8\n\t" /* jmp OUT. */
1123 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1124 sign_bit_position (op1),
1125 sign_bit_position (op2),
1126 cmp_opstr,
1127 sign_bit_position (op2),
1128 cmp_opstr);
1129
1130 gcc_assert (bufi > 0);
1131 gcc_assert ((unsigned int) bufi < sizeof (buf));
1132
1133 return buf;
1134 }
1135
1136 /* Output asm code for a signed-compare GT/GE conditional branch. */
1137 static const char *
1138 pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1139 {
1140 static char buf[1024];
1141 enum rtx_code code = GET_CODE (operands[0]);
1142 rtx op1;
1143 rtx op2;
1144 const char *cmp_opstr;
1145 int bufi = 0;
1146
1147 op1 = operands[1];
1148 op2 = operands[2];
1149
1150 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1151
1152 /* Determine the comparison operators for positive and negative operands. */
1153 if (code == GT)
1154 cmp_opstr = "qbgt";
1155 else if (code == GE)
1156 cmp_opstr = "qbge";
1157 else
1158 gcc_unreachable ();
1159
1160 if (is_near)
1161 bufi = snprintf (buf, sizeof (buf),
1162 "qbbs\t.+12, %%1, %d\n\t"
1163 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1164 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1165 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1166 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1167 sign_bit_position (op1),
1168 sign_bit_position (op2),
1169 cmp_opstr,
1170 sign_bit_position (op2),
1171 cmp_opstr);
1172 else
1173 bufi = snprintf (buf, sizeof (buf),
1174 "qbbs\t.+12, %%1, %d\n\t"
1175 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1176 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1177 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1178 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1179 "jmp\t.+8\n\t" /* jmp OUT. */
1180 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1181 sign_bit_position (op1),
1182 sign_bit_position (op2),
1183 cmp_opstr,
1184 sign_bit_position (op2),
1185 cmp_opstr);
1186
1187 gcc_assert (bufi > 0);
1188 gcc_assert ((unsigned int) bufi < sizeof (buf));
1189
1190 return buf;
1191 }
1192
1193 /* Output asm code for a signed-compare conditional branch.
1194
1195 If IS_NEAR is true, then QBBx instructions may be used for reaching
1196 the destination label. Otherwise JMP is used, at the expense of
1197 increased code size. */
1198 const char *
1199 pru_output_signed_cbranch (rtx *operands, bool is_near)
1200 {
1201 enum rtx_code code = GET_CODE (operands[0]);
1202
1203 if (code == LT || code == LE)
1204 return pru_output_ltle_signed_cbranch (operands, is_near);
1205 else if (code == GT || code == GE)
1206 return pru_output_gtge_signed_cbranch (operands, is_near);
1207 else
1208 gcc_unreachable ();
1209 }
1210
1211 /* Optimized version of pru_output_signed_cbranch for constant second
1212 operand. */
1213
1214 const char *
1215 pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1216 {
1217 static char buf[1024];
1218 enum rtx_code code = GET_CODE (operands[0]);
1219 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1220 const char *cmp_opstr;
1221 const char *rcmp_opstr;
1222
1223 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1224 code = swap_condition (code);
1225
1226 /* Determine normal and reversed comparison operators for both positive
1227 operands. This enables us to go completely unsigned.
1228
1229 NOTE: We cannot use the R print modifier because we convert signed
1230 comparison operators to unsigned ones. */
1231 switch (code)
1232 {
1233 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1234 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1235 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1236 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1237 default: gcc_unreachable ();
1238 }
1239
1240 /* OP2 is a constant unsigned byte - utilize this info to generate
1241 optimized code. We can "remove half" of the op table above because
1242 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1243 if (code == LT || code == LE)
1244 {
1245 if (is_near)
1246 snprintf (buf, sizeof (buf),
1247 "qbbs\t.+8, %%1, %d\n\t"
1248 "%s\t%%l3, %%1, %%u2",
1249 regop_sign_bit_pos,
1250 cmp_opstr);
1251 else
1252 snprintf (buf, sizeof (buf),
1253 "qbbs\t.+12, %%1, %d\n\t"
1254 "%s\t.+8, %%1, %%u2\n\t"
1255 "jmp\t%%%%label(%%l3)",
1256 regop_sign_bit_pos,
1257 rcmp_opstr);
1258 }
1259 else if (code == GT || code == GE)
1260 {
1261 if (is_near)
1262 snprintf (buf, sizeof (buf),
1263 "qbbs\t%%l3, %%1, %d\n\t"
1264 "%s\t%%l3, %%1, %%u2",
1265 regop_sign_bit_pos,
1266 cmp_opstr);
1267 else
1268 snprintf (buf, sizeof (buf),
1269 "qbbs\t.+8, %%1, %d\n\t"
1270 "%s\t.+8, %%1, %%u2\n\t"
1271 "jmp\t%%%%label(%%l3)",
1272 regop_sign_bit_pos,
1273 rcmp_opstr);
1274 }
1275 else
1276 gcc_unreachable ();
1277
1278 return buf;
1279 }
1280
1281 /* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1282 zero second operand. */
1283
1284 const char *
1285 pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1286 {
1287 static char buf[1024];
1288 enum rtx_code code = GET_CODE (operands[0]);
1289 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1290
1291 /* OP2 is a constant zero - utilize this info to simply check the
1292 OP1 sign bit when comparing for LT or GE. */
1293 if (code == LT)
1294 {
1295 if (is_near)
1296 snprintf (buf, sizeof (buf),
1297 "qbbs\t%%l3, %%1, %d\n\t",
1298 regop_sign_bit_pos);
1299 else
1300 snprintf (buf, sizeof (buf),
1301 "qbbc\t.+8, %%1, %d\n\t"
1302 "jmp\t%%%%label(%%l3)",
1303 regop_sign_bit_pos);
1304 }
1305 else if (code == GE)
1306 {
1307 if (is_near)
1308 snprintf (buf, sizeof (buf),
1309 "qbbc\t%%l3, %%1, %d\n\t",
1310 regop_sign_bit_pos);
1311 else
1312 snprintf (buf, sizeof (buf),
1313 "qbbs\t.+8, %%1, %d\n\t"
1314 "jmp\t%%%%label(%%l3)",
1315 regop_sign_bit_pos);
1316 }
1317 else
1318 gcc_unreachable ();
1319
1320 return buf;
1321 }
1322
1323 /* Addressing Modes. */
1324
1325 /* Return true if register REGNO is a valid base register.
1326 STRICT_P is true if REG_OK_STRICT is in effect. */
1327
1328 bool
1329 pru_regno_ok_for_base_p (int regno, bool strict_p)
1330 {
1331 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1332 return true;
1333
1334 /* The fake registers will be eliminated to either the stack or
1335 hard frame pointer, both of which are usually valid base registers.
1336 Reload deals with the cases where the eliminated form isn't valid. */
1337 return (GP_REG_P (regno)
1338 || regno == FRAME_POINTER_REGNUM
1339 || regno == ARG_POINTER_REGNUM);
1340 }
1341
1342 /* Return true if given xbbo constant OFFSET is valid. */
1343 static bool
1344 pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1345 {
1346 bool valid = UBYTE_INT (offset);
1347
1348 /* Reload can split multi word accesses, so make sure we can address
1349 the second word in a DI. */
1350 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1351 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1352
1353 return valid;
1354 }
1355
1356 /* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1357 base was not found in the pragma-filled pru_ctable. */
1358 int
1359 pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1360 {
1361 unsigned int i;
1362
1363 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1364 {
1365 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1366 return i;
1367 }
1368 return -1;
1369 }
1370
1371
1372 /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1373 and return the base CTABLE index if possible. */
1374 int
1375 pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1376 {
1377 unsigned int i;
1378
1379 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1380 {
1381 if (pru_ctable[i].valid && IN_RANGE (caddr,
1382 pru_ctable[i].base,
1383 pru_ctable[i].base + 0xff))
1384 return i;
1385 }
1386 return -1;
1387 }
1388
1389
1390 /* Return the offset from some CTABLE base for this address. */
1391 int
1392 pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1393 {
1394 int i;
1395
1396 i = pru_get_ctable_base_index (caddr);
1397 gcc_assert (i >= 0);
1398
1399 return caddr - pru_ctable[i].base;
1400 }
1401
1402 /* Return true if the address expression formed by BASE + OFFSET is
1403 valid.
1404
1405 Note that the following address is not handled here:
1406 base CTABLE constant base + UBYTE constant offset
1407 The constants will be folded. The ctable_addr_operand predicate will take
1408 care of the validation. The CTABLE base+offset split will happen during
1409 operand printing. */
1410 static bool
1411 pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1412 {
1413 if (!strict_p && GET_CODE (base) == SUBREG)
1414 base = SUBREG_REG (base);
1415 if (!strict_p && GET_CODE (offset) == SUBREG)
1416 offset = SUBREG_REG (offset);
1417
1418 if (REG_P (base)
1419 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1420 && ((CONST_INT_P (offset)
1421 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1422 || (REG_P (offset)
1423 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1424 /* base register + register offset
1425 * OR base register + UBYTE constant offset. */
1426 return true;
1427 else if (REG_P (base)
1428 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1429 && ctable_base_operand (offset, VOIDmode))
1430 /* base CTABLE constant base + register offset
1431 * Note: GCC always puts the register as a first operand of PLUS. */
1432 return true;
1433 else
1434 return false;
1435 }
1436
1437 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1438 static bool
1439 pru_legitimate_address_p (machine_mode mode,
1440 rtx operand, bool strict_p)
1441 {
1442 switch (GET_CODE (operand))
1443 {
1444 /* Direct. */
1445 case SYMBOL_REF:
1446 case LABEL_REF:
1447 case CONST:
1448 case CONST_WIDE_INT:
1449 return false;
1450
1451 case CONST_INT:
1452 return ctable_addr_operand (operand, VOIDmode);
1453
1454 /* Register indirect. */
1455 case REG:
1456 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1457
1458 /* Register indirect with displacement. */
1459 case PLUS:
1460 {
1461 rtx op0 = XEXP (operand, 0);
1462 rtx op1 = XEXP (operand, 1);
1463
1464 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1465 }
1466
1467 default:
1468 break;
1469 }
1470 return false;
1471 }
1472 \f
1473 /* Output assembly language related definitions. */
1474
1475 /* Implement TARGET_ASM_CONSTRUCTOR. */
1476 static void
1477 pru_elf_asm_constructor (rtx symbol, int priority)
1478 {
1479 char buf[23];
1480 section *s;
1481
1482 if (priority == DEFAULT_INIT_PRIORITY)
1483 snprintf (buf, sizeof (buf), ".init_array");
1484 else
1485 {
1486 /* While priority is known to be in range [0, 65535], so 18 bytes
1487 would be enough, the compiler might not know that. To avoid
1488 -Wformat-truncation false positive, use a larger size. */
1489 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1490 }
1491 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1492 switch_to_section (s);
1493 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1494 }
1495
1496 /* Implement TARGET_ASM_DESTRUCTOR. */
1497 static void
1498 pru_elf_asm_destructor (rtx symbol, int priority)
1499 {
1500 char buf[23];
1501 section *s;
1502
1503 if (priority == DEFAULT_INIT_PRIORITY)
1504 snprintf (buf, sizeof (buf), ".fini_array");
1505 else
1506 {
1507 /* While priority is known to be in range [0, 65535], so 18 bytes
1508 would be enough, the compiler might not know that. To avoid
1509 -Wformat-truncation false positive, use a larger size. */
1510 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1511 }
1512 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1513 switch_to_section (s);
1514 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1515 }
1516
1517 /* Map rtx_code to unsigned PRU branch op suffix. Callers must
1518 handle sign comparison themselves for signed operations. */
1519 static const char *
1520 pru_comparison_str (enum rtx_code cond)
1521 {
1522 switch (cond)
1523 {
1524 case NE: return "ne";
1525 case EQ: return "eq";
1526 case GEU: return "ge";
1527 case GTU: return "gt";
1528 case LEU: return "le";
1529 case LTU: return "lt";
1530 default: gcc_unreachable ();
1531 }
1532 }
1533
1534 /* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1535 the bit representation of X by "casting" it to CONST_INT. */
1536
1537 static rtx
1538 pru_to_int_mode (rtx x)
1539 {
1540 machine_mode mode = GET_MODE (x);
1541
1542 return VOIDmode == mode
1543 ? x
1544 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1545 }
1546
1547 /* Translate between the MachineDescription notion
1548 of 8-bit consecutive registers, to the PRU
1549 assembler syntax of REGWORD[.SUBREG]. */
1550 static const char *
1551 pru_asm_regname (rtx op)
1552 {
1553 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1554 int speci, regi;
1555
1556 gcc_assert (REG_P (op));
1557
1558 if (!canon_reg_names[0][0][0])
1559 {
1560 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1561 for (speci = 0; speci < 3; speci++)
1562 {
1563 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1564 if ((regi + sz) > (32 * 4))
1565 continue; /* Invalid entry. */
1566
1567 /* Construct the lookup table. */
1568 const char *suffix = "";
1569
1570 switch ((sz << 8) | (regi % 4))
1571 {
1572 case (1 << 8) | 0: suffix = ".b0"; break;
1573 case (1 << 8) | 1: suffix = ".b1"; break;
1574 case (1 << 8) | 2: suffix = ".b2"; break;
1575 case (1 << 8) | 3: suffix = ".b3"; break;
1576 case (2 << 8) | 0: suffix = ".w0"; break;
1577 case (2 << 8) | 1: suffix = ".w1"; break;
1578 case (2 << 8) | 2: suffix = ".w2"; break;
1579 case (4 << 8) | 0: suffix = ""; break;
1580 default:
1581 /* Invalid entry. */
1582 continue;
1583 }
1584 sprintf (&canon_reg_names[speci][regi][0],
1585 "r%d%s", regi / 4, suffix);
1586 }
1587 }
1588
1589 switch (GET_MODE_SIZE (GET_MODE (op)))
1590 {
1591 case 1: speci = 0; break;
1592 case 2: speci = 1; break;
1593 case 4: speci = 2; break;
1594 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1595 default: gcc_unreachable ();
1596 }
1597 regi = REGNO (op);
1598 gcc_assert (regi < LAST_GP_REGNUM);
1599 gcc_assert (canon_reg_names[speci][regi][0]);
1600
1601 return &canon_reg_names[speci][regi][0];
1602 }
1603
1604 /* Print the operand OP to file stream FILE modified by LETTER.
1605 LETTER can be one of:
1606
1607 b: prints the register byte start (used by LBBO/SBBO).
1608 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1609 F: Full 32-bit register.
1610 H: Higher 16-bits of a const_int operand.
1611 L: Lower 16-bits of a const_int operand.
1612 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1613 P: prints swapped condition.
1614 Q: prints swapped and reversed condition.
1615 R: prints reversed condition.
1616 S: print operand mode size (but do not print the operand itself).
1617 T: print exact_log2 () for const_int operands.
1618 u: print QI constant integer as unsigned. No transformation for regs.
1619 V: print exact_log2 () of negated const_int operands.
1620 w: Lower 32-bits of a const_int operand.
1621 W: Upper 32-bits of a const_int operand.
1622 y: print the next 8-bit register (regardless of op size).
1623 z: print the second next 8-bit register (regardless of op size).
1624 */
1625 static void
1626 pru_print_operand (FILE *file, rtx op, int letter)
1627 {
1628 switch (letter)
1629 {
1630 case 'S':
1631 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1632 return;
1633
1634 default:
1635 break;
1636 }
1637
1638 if (comparison_operator (op, VOIDmode))
1639 {
1640 enum rtx_code cond = GET_CODE (op);
1641 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1642
1643 switch (letter)
1644 {
1645 case 0:
1646 fprintf (file, "%s", pru_comparison_str (cond));
1647 return;
1648 case 'P':
1649 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1650 return;
1651 case 'Q':
1652 cond = swap_condition (cond);
1653 /* Fall through to reverse. */
1654 case 'R':
1655 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1656 return;
1657 }
1658 }
1659
1660 switch (GET_CODE (op))
1661 {
1662 case REG:
1663 if (letter == 0 || letter == 'u')
1664 {
1665 fprintf (file, "%s", pru_asm_regname (op));
1666 return;
1667 }
1668 else if (letter == 'b')
1669 {
1670 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1671 {
1672 output_operand_lossage ("I/O register operand for '%%%c'",
1673 letter);
1674 return;
1675 }
1676 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1677 return;
1678 }
1679 else if (letter == 'F' || letter == 'N')
1680 {
1681 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1682 {
1683 output_operand_lossage ("I/O register operand for '%%%c'",
1684 letter);
1685 return;
1686 }
1687 if (REGNO (op) % 4 != 0)
1688 {
1689 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1690 letter);
1691 return;
1692 }
1693 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1694 return;
1695 }
1696 else if (letter == 'y')
1697 {
1698 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1699 {
1700 output_operand_lossage ("invalid operand for '%%%c'", letter);
1701 return;
1702 }
1703 fprintf (file, "%s", reg_names[REGNO (op) + 1]);
1704 return;
1705 }
1706 else if (letter == 'z')
1707 {
1708 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 2)
1709 {
1710 output_operand_lossage ("invalid operand for '%%%c'", letter);
1711 return;
1712 }
1713 fprintf (file, "%s", reg_names[REGNO (op) + 2]);
1714 return;
1715 }
1716 break;
1717
1718 case CONST_INT:
1719 if (letter == 'H')
1720 {
1721 HOST_WIDE_INT val = INTVAL (op);
1722 val = (val >> 16) & 0xFFFF;
1723 output_addr_const (file, gen_int_mode (val, SImode));
1724 return;
1725 }
1726 else if (letter == 'L')
1727 {
1728 HOST_WIDE_INT val = INTVAL (op);
1729 val &= 0xFFFF;
1730 output_addr_const (file, gen_int_mode (val, SImode));
1731 return;
1732 }
1733 else if (letter == 'T')
1734 {
1735 /* The predicate should have already validated the 1-high-bit
1736 requirement. Use CTZ here to deal with constant's sign
1737 extension. */
1738 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1739 if (val < 0 || val > 31)
1740 {
1741 output_operand_lossage ("invalid operand for '%%%c'", letter);
1742 return;
1743 }
1744 output_addr_const (file, gen_int_mode (val, SImode));
1745 return;
1746 }
1747 else if (letter == 'V')
1748 {
1749 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1750 if (val < 0 || val > 31)
1751 {
1752 output_operand_lossage ("invalid operand for '%%%c'", letter);
1753 return;
1754 }
1755 output_addr_const (file, gen_int_mode (val, SImode));
1756 return;
1757 }
1758 else if (letter == 'w')
1759 {
1760 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1761 output_addr_const (file, gen_int_mode (val, SImode));
1762 return;
1763 }
1764 else if (letter == 'W')
1765 {
1766 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1767 output_addr_const (file, gen_int_mode (val, SImode));
1768 return;
1769 }
1770 else if (letter == 'u')
1771 {
1772 /* Workaround GCC's representation of QI constants in sign-extended
1773 form, and PRU's assembler insistence on unsigned constant
1774 integers. See the notes about O constraint. */
1775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1776 return;
1777 }
1778 /* Else, fall through. */
1779
1780 case CONST:
1781 case LABEL_REF:
1782 case SYMBOL_REF:
1783 if (letter == 0)
1784 {
1785 output_addr_const (file, op);
1786 return;
1787 }
1788 break;
1789
1790 case CONST_FIXED:
1791 {
1792 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1793 if (letter != 0)
1794 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1795 letter);
1796 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1797 return;
1798 }
1799 break;
1800
1801 case CONST_DOUBLE:
1802 if (letter == 0)
1803 {
1804 long val;
1805
1806 if (GET_MODE (op) != SFmode)
1807 {
1808 output_operand_lossage ("double constants not supported");
1809 return;
1810 }
1811 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1812 fprintf (file, "0x%lx", val);
1813 return;
1814 }
1815 else if (letter == 'w' || letter == 'W')
1816 {
1817 long t[2];
1818 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1819 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1820 return;
1821 }
1822 else
1823 {
1824 output_operand_lossage ("invalid operand for '%%%c'", letter);
1825 return;
1826 }
1827 break;
1828
1829 case SUBREG:
1830 /* Subregs should not appear at so late stage. */
1831 gcc_unreachable ();
1832 break;
1833
1834 case MEM:
1835 if (letter == 0)
1836 {
1837 output_address (VOIDmode, op);
1838 return;
1839 }
1840 else if (letter == 'B')
1841 {
1842 rtx base = XEXP (op, 0);
1843 if (GET_CODE (base) == PLUS)
1844 {
1845 rtx op0 = XEXP (base, 0);
1846 rtx op1 = XEXP (base, 1);
1847
1848 /* PLUS cannot have two constant operands, so first one
1849 of them must be a REG, hence we must check for an
1850 exact base address. */
1851 if (ctable_base_operand (op1, VOIDmode))
1852 {
1853 fprintf (file, "c");
1854 return;
1855 }
1856 else if (REG_P (op0))
1857 {
1858 fprintf (file, "b");
1859 return;
1860 }
1861 else
1862 gcc_unreachable ();
1863 }
1864 else if (REG_P (base))
1865 {
1866 fprintf (file, "b");
1867 return;
1868 }
1869 else if (ctable_addr_operand (base, VOIDmode))
1870 {
1871 fprintf (file, "c");
1872 return;
1873 }
1874 else
1875 gcc_unreachable ();
1876 }
1877 break;
1878
1879 case CODE_LABEL:
1880 if (letter == 0)
1881 {
1882 output_addr_const (file, op);
1883 return;
1884 }
1885 break;
1886
1887 default:
1888 break;
1889 }
1890
1891 output_operand_lossage ("unsupported operand %s for code '%c'",
1892 GET_RTX_NAME (GET_CODE (op)), letter);
1893 }
1894
1895 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1896 static void
1897 pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1898 {
1899 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1900 {
1901 output_operand_lossage ("unexpected text address:");
1902 return;
1903 }
1904
1905 switch (GET_CODE (op))
1906 {
1907 case CONST:
1908 case LABEL_REF:
1909 case CONST_WIDE_INT:
1910 case SYMBOL_REF:
1911 break;
1912
1913 case CONST_INT:
1914 {
1915 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1916 int base = pru_get_ctable_base_index (caddr);
1917 int offs = pru_get_ctable_base_offset (caddr);
1918 if (base < 0)
1919 {
1920 output_operand_lossage ("unsupported constant address:");
1921 return;
1922 }
1923 fprintf (file, "%d, %d", base, offs);
1924 return;
1925 }
1926 break;
1927
1928 case PLUS:
1929 {
1930 int base;
1931 rtx op0 = XEXP (op, 0);
1932 rtx op1 = XEXP (op, 1);
1933
1934 if (REG_P (op0) && CONST_INT_P (op1)
1935 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1936 {
1937 base = pru_get_ctable_exact_base_index (INTVAL (op1));
1938 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1939 return;
1940 }
1941 else if (REG_P (op1) && CONST_INT_P (op0)
1942 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1943 {
1944 /* Not a valid RTL. */
1945 gcc_unreachable ();
1946 }
1947 else if (REG_P (op0) && CONSTANT_P (op1))
1948 {
1949 fprintf (file, "%s, ", pru_asm_regname (op0));
1950 output_addr_const (file, op1);
1951 return;
1952 }
1953 else if (REG_P (op1) && CONSTANT_P (op0))
1954 {
1955 /* Not a valid RTL. */
1956 gcc_unreachable ();
1957 }
1958 else if (REG_P (op1) && REG_P (op0))
1959 {
1960 fprintf (file, "%s, %s", pru_asm_regname (op0),
1961 pru_asm_regname (op1));
1962 return;
1963 }
1964 }
1965 break;
1966
1967 case REG:
1968 fprintf (file, "%s, 0", pru_asm_regname (op));
1969 return;
1970
1971 case MEM:
1972 {
1973 rtx base = XEXP (op, 0);
1974 pru_print_operand_address (file, mode, base);
1975 return;
1976 }
1977 default:
1978 break;
1979 }
1980
1981 output_operand_lossage ("unsupported memory expression:");
1982 }
1983
1984 /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
1985 static void
1986 pru_asm_function_prologue (FILE *file)
1987 {
1988 if (flag_verbose_asm || flag_debug_asm)
1989 pru_dump_frame_layout (file);
1990 }
1991
1992 /* Implement `TARGET_ASM_INTEGER'.
1993 Target hook for assembling integer objects. PRU version needs
1994 special handling for references to pmem. Code copied from AVR. */
1995
1996 static bool
1997 pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
1998 {
1999 if (size == POINTER_SIZE / BITS_PER_UNIT
2000 && aligned_p
2001 && text_segment_operand (x, VOIDmode))
2002 {
2003 fputs ("\t.4byte\t%pmem(", asm_out_file);
2004 output_addr_const (asm_out_file, x);
2005 fputs (")\n", asm_out_file);
2006
2007 return true;
2008 }
2009 else if (size == INIT_ARRAY_ENTRY_BYTES
2010 && aligned_p
2011 && text_segment_operand (x, VOIDmode))
2012 {
2013 fputs ("\t.2byte\t%pmem(", asm_out_file);
2014 output_addr_const (asm_out_file, x);
2015 fputs (")\n", asm_out_file);
2016
2017 return true;
2018 }
2019 else
2020 {
2021 return default_assemble_integer (x, size, aligned_p);
2022 }
2023 }
2024
2025 /* Implement TARGET_ASM_FILE_START. */
2026
2027 static void
2028 pru_file_start (void)
2029 {
2030 default_file_start ();
2031
2032 /* Compiler will take care of placing %label, so there is no
2033 need to confuse users with this warning. */
2034 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2035 }
2036 \f
2037 /* Function argument related. */
2038
2039 /* Return the number of bytes needed for storing an argument with
2040 the given MODE and TYPE. */
2041 static int
2042 pru_function_arg_size (machine_mode mode, const_tree type)
2043 {
2044 HOST_WIDE_INT param_size;
2045
2046 if (mode == BLKmode)
2047 param_size = int_size_in_bytes (type);
2048 else
2049 param_size = GET_MODE_SIZE (mode);
2050
2051 /* Convert to words (round up). */
2052 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2053 gcc_assert (param_size >= 0);
2054
2055 return param_size;
2056 }
2057
2058 /* Check if argument with the given size must be
2059 passed/returned in a register.
2060
2061 Reference:
2062 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2063
2064 Arguments other than 8/16/24/32/64bits are passed on stack. */
2065 static bool
2066 pru_arg_in_reg_bysize (size_t sz)
2067 {
2068 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2069 }
2070
2071 /* Helper function to get the starting storage HW register for an argument,
2072 or -1 if it must be passed on stack. The cum_v state is not changed. */
2073 static int
2074 pru_function_arg_regi (cumulative_args_t cum_v,
2075 machine_mode mode, const_tree type,
2076 bool named)
2077 {
2078 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2079 size_t argsize = pru_function_arg_size (mode, type);
2080 size_t i, bi;
2081 int regi = -1;
2082
2083 if (!pru_arg_in_reg_bysize (argsize))
2084 return -1;
2085
2086 if (!named)
2087 return -1;
2088
2089 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2090 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2091 {
2092 /* VLAs and vector types are not defined in the PRU ABI. Let's
2093 handle them the same as their same-sized counterparts. This way
2094 we do not need to treat BLKmode differently, and need only to check
2095 the size. */
2096 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2097 || argsize == 4 || argsize == 8);
2098
2099 /* Ensure SI and DI arguments are stored in full registers only. */
2100 if ((argsize >= 4) && (i % 4) != 0)
2101 continue;
2102
2103 /* Structures with size 24 bits are passed starting at a full
2104 register boundary. */
2105 if (argsize == 3 && (i % 4) != 0)
2106 continue;
2107
2108 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2109 into a different full register. */
2110 if (argsize == 2 && (i % 4) == 3)
2111 continue;
2112
2113 for (bi = 0;
2114 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2115 bi++)
2116 {
2117 if (cum->regs_used[bi + i])
2118 break;
2119 }
2120 if (bi == argsize)
2121 regi = FIRST_ARG_REGNUM + i;
2122 }
2123
2124 return regi;
2125 }
2126
2127 /* Mark CUM_V that a function argument will occupy HW register slot starting
2128 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2129 depends on the MODE and TYPE of the argument. */
2130 static void
2131 pru_function_arg_regi_mark_slot (int regi,
2132 cumulative_args_t cum_v,
2133 machine_mode mode, const_tree type,
2134 bool named)
2135 {
2136 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2137 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2138
2139 gcc_assert (named);
2140
2141 /* Mark all byte sub-registers occupied by argument as used. */
2142 while (param_size--)
2143 {
2144 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2145 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2146 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2147 regi++;
2148 }
2149 }
2150
2151 /* Define where to put the arguments to a function. Value is zero to
2152 push the argument on the stack, or a hard register in which to
2153 store the argument.
2154
2155 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2156 the preceding args and about the function being called.
2157 ARG is a description of the argument. */
2158
2159 static rtx
2160 pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2161 {
2162 rtx return_rtx = NULL_RTX;
2163 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2164
2165 if (regi >= 0)
2166 return_rtx = gen_rtx_REG (arg.mode, regi);
2167
2168 return return_rtx;
2169 }
2170
2171 /* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2172 between registers and memory, so we can return 0. */
2173
2174 static int
2175 pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
2176 {
2177 return 0;
2178 }
2179
2180 /* Update the data in CUM to advance over argument ARG. */
2181
2182 static void
2183 pru_function_arg_advance (cumulative_args_t cum_v,
2184 const function_arg_info &arg)
2185 {
2186 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2187
2188 if (regi >= 0)
2189 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2190 arg.type, arg.named);
2191 }
2192
2193 /* Implement TARGET_FUNCTION_VALUE. */
2194 static rtx
2195 pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2196 bool outgoing ATTRIBUTE_UNUSED)
2197 {
2198 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2199 }
2200
2201 /* Implement TARGET_LIBCALL_VALUE. */
2202 static rtx
2203 pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2204 {
2205 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2206 }
2207
2208 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2209 static bool
2210 pru_function_value_regno_p (const unsigned int regno)
2211 {
2212 return regno == FIRST_RETVAL_REGNUM;
2213 }
2214
2215 /* Implement TARGET_RETURN_IN_MEMORY. */
2216 bool
2217 pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2218 {
2219 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2220 || int_size_in_bytes (type) == -1);
2221
2222 return in_memory;
2223 }
2224 \f
2225 /* Implement TARGET_CAN_USE_DOLOOP_P. */
2226
2227 static bool
2228 pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2229 unsigned int loop_depth, bool)
2230 {
2231 /* Considering limitations in the hardware, only use doloop
2232 for innermost loops which must be entered from the top. */
2233 if (loop_depth > 1)
2234 return false;
2235 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2236 holds the maximum number of loop latch executions, while PRU loop
2237 instruction needs the count of loop body executions. */
2238 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2239 return false;
2240
2241 return true;
2242 }
2243
2244 /* NULL if INSN insn is valid within a low-overhead loop.
2245 Otherwise return why doloop cannot be applied. */
2246
2247 static const char *
2248 pru_invalid_within_doloop (const rtx_insn *insn)
2249 {
2250 if (CALL_P (insn))
2251 return "Function call in the loop.";
2252
2253 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2254 return "Return from a call instruction in the loop.";
2255
2256 if (NONDEBUG_INSN_P (insn)
2257 && INSN_CODE (insn) < 0
2258 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2259 || asm_noperands (PATTERN (insn)) >= 0))
2260 return "Loop contains asm statement.";
2261
2262 return NULL;
2263 }
2264
2265
2266 /* Figure out where to put LABEL, which is the label for a repeat loop.
2267 The loop ends just before LAST_INSN. If SHARED, insns other than the
2268 "repeat" might use LABEL to jump to the loop's continuation point.
2269
2270 Return the last instruction in the adjusted loop. */
2271
2272 static rtx_insn *
2273 pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2274 bool shared)
2275 {
2276 rtx_insn *next, *prev;
2277 int count = 0, code, icode;
2278
2279 if (dump_file)
2280 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2281 INSN_UID (last_insn));
2282
2283 /* Set PREV to the last insn in the loop. */
2284 prev = PREV_INSN (last_insn);
2285
2286 /* Set NEXT to the next insn after the loop label. */
2287 next = last_insn;
2288 if (!shared)
2289 while (prev != 0)
2290 {
2291 code = GET_CODE (prev);
2292 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2293 break;
2294
2295 if (INSN_P (prev))
2296 {
2297 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2298 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2299
2300 /* Other insns that should not be in the last two opcodes. */
2301 icode = recog_memoized (prev);
2302 if (icode < 0
2303 || icode == CODE_FOR_pruloophi
2304 || icode == CODE_FOR_pruloopsi)
2305 break;
2306
2307 count++;
2308 next = prev;
2309 if (dump_file)
2310 print_rtl_single (dump_file, next);
2311 if (count == 2)
2312 break;
2313 }
2314 prev = PREV_INSN (prev);
2315 }
2316
2317 /* Insert the nops. */
2318 if (dump_file && count < 2)
2319 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2320 2 - count, count == 1 ? "" : "s");
2321
2322 for (; count < 2; count++)
2323 emit_insn_before (gen_nop (), last_insn);
2324
2325 /* Insert the label. */
2326 emit_label_before (label, last_insn);
2327
2328 return last_insn;
2329 }
2330
2331 /* If IS_END is false, expand a canonical doloop_begin RTL into the
2332 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2333 doloop_end_internal. */
2334 void
2335 pru_emit_doloop (rtx *operands, int is_end)
2336 {
2337 rtx tag;
2338
2339 if (cfun->machine->doloop_tags == 0
2340 || cfun->machine->doloop_tag_from_end == is_end)
2341 {
2342 cfun->machine->doloop_tags++;
2343 cfun->machine->doloop_tag_from_end = is_end;
2344 }
2345
2346 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2347 machine_mode opmode = GET_MODE (operands[0]);
2348 if (is_end)
2349 {
2350 if (opmode == HImode)
2351 emit_jump_insn (gen_doloop_end_internalhi (operands[0],
2352 operands[1], tag));
2353 else if (opmode == SImode)
2354 emit_jump_insn (gen_doloop_end_internalsi (operands[0],
2355 operands[1], tag));
2356 else
2357 gcc_unreachable ();
2358 }
2359 else
2360 {
2361 if (opmode == HImode)
2362 emit_insn (gen_doloop_begin_internalhi (operands[0], operands[0], tag));
2363 else if (opmode == SImode)
2364 emit_insn (gen_doloop_begin_internalsi (operands[0], operands[0], tag));
2365 else
2366 gcc_unreachable ();
2367 }
2368 }
2369
2370
2371 /* Code for converting doloop_begins and doloop_ends into valid
2372 PRU instructions. Idea and code snippets borrowed from mep port.
2373
2374 A doloop_begin is just a placeholder:
2375
2376 $count = unspec ($count)
2377
2378 where $count is initially the number of iterations.
2379 doloop_end has the form:
2380
2381 if (--$count == 0) goto label
2382
2383 The counter variable is private to the doloop insns, nothing else
2384 relies on its value.
2385
2386 There are three cases, in decreasing order of preference:
2387
2388 1. A loop has exactly one doloop_begin and one doloop_end.
2389 The doloop_end branches to the first instruction after
2390 the doloop_begin.
2391
2392 In this case we can replace the doloop_begin with a LOOP
2393 instruction and remove the doloop_end. I.e.:
2394
2395 $count1 = unspec ($count1)
2396 label:
2397 ...
2398 if (--$count2 != 0) goto label
2399
2400 becomes:
2401
2402 LOOP end_label,$count1
2403 label:
2404 ...
2405 end_label:
2406 # end loop
2407
2408 2. As for (1), except there are several doloop_ends. One of them
2409 (call it X) falls through to a label L. All the others fall
2410 through to branches to L.
2411
2412 In this case, we remove X and replace the other doloop_ends
2413 with branches to the LOOP label. For example:
2414
2415 $count1 = unspec ($count1)
2416 label:
2417 ...
2418 if (--$count1 != 0) goto label
2419 end_label:
2420 ...
2421 if (--$count2 != 0) goto label
2422 goto end_label
2423
2424 becomes:
2425
2426 LOOP end_label,$count1
2427 label:
2428 ...
2429 end_label:
2430 # end repeat
2431 ...
2432 goto end_label
2433
2434 3. The fallback case. Replace doloop_begins with:
2435
2436 $count = $count
2437
2438 Replace doloop_ends with the equivalent of:
2439
2440 $count = $count - 1
2441 if ($count != 0) goto loop_label
2442
2443 */
2444
2445 /* A structure describing one doloop_begin. */
2446 struct pru_doloop_begin {
2447 /* The next doloop_begin with the same tag. */
2448 struct pru_doloop_begin *next;
2449
2450 /* The instruction itself. */
2451 rtx_insn *insn;
2452
2453 /* The initial counter value. */
2454 rtx loop_count;
2455
2456 /* The counter register. */
2457 rtx counter;
2458 };
2459
2460 /* A structure describing a doloop_end. */
2461 struct pru_doloop_end {
2462 /* The next doloop_end with the same loop tag. */
2463 struct pru_doloop_end *next;
2464
2465 /* The instruction itself. */
2466 rtx_insn *insn;
2467
2468 /* The first instruction after INSN when the branch isn't taken. */
2469 rtx_insn *fallthrough;
2470
2471 /* The location of the counter value. Since doloop_end_internal is a
2472 jump instruction, it has to allow the counter to be stored anywhere
2473 (any non-fixed register). */
2474 rtx counter;
2475
2476 /* The target label (the place where the insn branches when the counter
2477 isn't zero). */
2478 rtx label;
2479
2480 /* A scratch register. Only available when COUNTER isn't stored
2481 in a general register. */
2482 rtx scratch;
2483 };
2484
2485
2486 /* One do-while loop. */
2487 struct pru_doloop {
2488 /* All the doloop_begins for this loop (in no particular order). */
2489 struct pru_doloop_begin *begin;
2490
2491 /* All the doloop_ends. When there is more than one, arrange things
2492 so that the first one is the most likely to be X in case (2) above. */
2493 struct pru_doloop_end *end;
2494 };
2495
2496
2497 /* Return true if LOOP can be converted into LOOP form
2498 (that is, if it matches cases (1) or (2) above). */
2499
2500 static bool
2501 pru_repeat_loop_p (struct pru_doloop *loop)
2502 {
2503 struct pru_doloop_end *end;
2504 rtx_insn *fallthrough;
2505
2506 /* There must be exactly one doloop_begin and at least one doloop_end. */
2507 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2508 return false;
2509
2510 /* The first doloop_end (X) must branch back to the insn after
2511 the doloop_begin. */
2512 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2513 return false;
2514
2515 /* Check that the first doloop_end (X) can actually reach
2516 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2517 if (get_attr_length (loop->end->insn) != 4)
2518 return false;
2519
2520 /* All the other doloop_ends must branch to the same place as X.
2521 When the branch isn't taken, they must jump to the instruction
2522 after X. */
2523 fallthrough = loop->end->fallthrough;
2524 for (end = loop->end->next; end != 0; end = end->next)
2525 if (end->label != loop->end->label
2526 || !simplejump_p (end->fallthrough)
2527 || fallthrough
2528 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2529 return false;
2530
2531 return true;
2532 }
2533
2534
2535 /* The main repeat reorg function. See comment above for details. */
2536
2537 static void
2538 pru_reorg_loop (rtx_insn *insns)
2539 {
2540 rtx_insn *insn;
2541 struct pru_doloop *loops, *loop;
2542 struct pru_doloop_begin *begin;
2543 struct pru_doloop_end *end;
2544 size_t tmpsz;
2545
2546 /* Quick exit if we haven't created any loops. */
2547 if (cfun->machine->doloop_tags == 0)
2548 return;
2549
2550 /* Create an array of pru_doloop structures. */
2551 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2552 loops = (struct pru_doloop *) alloca (tmpsz);
2553 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2554
2555 /* Search the function for do-while insns and group them by loop tag. */
2556 for (insn = insns; insn; insn = NEXT_INSN (insn))
2557 if (INSN_P (insn))
2558 switch (recog_memoized (insn))
2559 {
2560 case CODE_FOR_doloop_begin_internalhi:
2561 case CODE_FOR_doloop_begin_internalsi:
2562 insn_extract (insn);
2563 loop = &loops[INTVAL (recog_data.operand[2])];
2564
2565 tmpsz = sizeof (struct pru_doloop_begin);
2566 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2567 begin->next = loop->begin;
2568 begin->insn = insn;
2569 begin->loop_count = recog_data.operand[1];
2570 begin->counter = recog_data.operand[0];
2571
2572 loop->begin = begin;
2573 break;
2574
2575 case CODE_FOR_doloop_end_internalhi:
2576 case CODE_FOR_doloop_end_internalsi:
2577 insn_extract (insn);
2578 loop = &loops[INTVAL (recog_data.operand[2])];
2579
2580 tmpsz = sizeof (struct pru_doloop_end);
2581 end = (struct pru_doloop_end *) alloca (tmpsz);
2582 end->insn = insn;
2583 end->fallthrough = next_real_insn (insn);
2584 end->counter = recog_data.operand[0];
2585 end->label = recog_data.operand[1];
2586 end->scratch = recog_data.operand[3];
2587
2588 /* If this insn falls through to an unconditional jump,
2589 give it a lower priority than the others. */
2590 if (loop->end != 0 && simplejump_p (end->fallthrough))
2591 {
2592 end->next = loop->end->next;
2593 loop->end->next = end;
2594 }
2595 else
2596 {
2597 end->next = loop->end;
2598 loop->end = end;
2599 }
2600 break;
2601 }
2602
2603 /* Convert the insns for each loop in turn. */
2604 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2605 if (pru_repeat_loop_p (loop))
2606 {
2607 /* Case (1) or (2). */
2608 rtx_code_label *repeat_label;
2609 rtx label_ref;
2610
2611 /* Create a new label for the repeat insn. */
2612 repeat_label = gen_label_rtx ();
2613
2614 /* Replace the doloop_begin with a repeat. We get rid
2615 of the iteration register because LOOP instruction
2616 will utilize an internal for the PRU core LOOP register. */
2617 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2618 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
2619 if (loop_mode == HImode)
2620 emit_insn_before (gen_pruloophi (loop->begin->loop_count, label_ref),
2621 loop->begin->insn);
2622 else if (loop_mode == SImode)
2623 {
2624 rtx loop_rtx = gen_pruloopsi (loop->begin->loop_count, label_ref);
2625 emit_insn_before (loop_rtx, loop->begin->insn);
2626 }
2627 else if (loop_mode == VOIDmode)
2628 {
2629 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2630 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
2631 rtx loop_rtx = gen_pruloopsi (loop->begin->loop_count, label_ref);
2632 emit_insn_before (loop_rtx, loop->begin->insn);
2633 }
2634 else
2635 gcc_unreachable ();
2636 delete_insn (loop->begin->insn);
2637
2638 /* Insert the repeat label before the first doloop_end.
2639 Fill the gap with nops if LOOP insn is less than 2
2640 instructions away than loop->end. */
2641 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2642 loop->end->next != 0);
2643
2644 /* Emit a pruloop_end (to improve the readability of the output). */
2645 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2646
2647 /* HACK: TODO: This is usually not needed, but is required for
2648 a few rare cases where a JUMP that breaks the loop
2649 references the LOOP_END address. In other words, since
2650 we're missing a real "loop_end" instruction, a loop "break"
2651 may accidentally reference the loop end itself, and thus
2652 continuing the cycle. */
2653 for (insn = NEXT_INSN (loop->end->insn);
2654 insn != next_real_insn (loop->end->insn);
2655 insn = NEXT_INSN (insn))
2656 {
2657 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2658 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2659 }
2660
2661 /* Delete the first doloop_end. */
2662 delete_insn (loop->end->insn);
2663
2664 /* Replace the others with branches to REPEAT_LABEL. */
2665 for (end = loop->end->next; end != 0; end = end->next)
2666 {
2667 rtx_insn *newjmp;
2668 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2669 JUMP_LABEL (newjmp) = repeat_label;
2670 delete_insn (end->insn);
2671 delete_insn (end->fallthrough);
2672 }
2673 }
2674 else
2675 {
2676 /* Case (3). First replace all the doloop_begins with setting
2677 the HW register used for loop counter. */
2678 for (begin = loop->begin; begin != 0; begin = begin->next)
2679 {
2680 insn = gen_move_insn (copy_rtx (begin->counter),
2681 copy_rtx (begin->loop_count));
2682 emit_insn_before (insn, begin->insn);
2683 delete_insn (begin->insn);
2684 }
2685
2686 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2687 for (end = loop->end; end != 0; end = end->next)
2688 {
2689 rtx reg;
2690
2691 start_sequence ();
2692
2693 /* Load the counter value into a general register. */
2694 reg = end->counter;
2695 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2696 {
2697 reg = end->scratch;
2698 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2699 }
2700
2701 /* Decrement the counter. */
2702 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2703 constm1_rtx));
2704
2705 /* Copy it back to its original location. */
2706 if (reg != end->counter)
2707 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2708
2709 /* Jump back to the start label. */
2710 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2711 const0_rtx),
2712 reg,
2713 const0_rtx,
2714 end->label));
2715
2716 JUMP_LABEL (insn) = end->label;
2717 LABEL_NUSES (end->label)++;
2718
2719 /* Emit the whole sequence before the doloop_end. */
2720 insn = get_insns ();
2721 end_sequence ();
2722 emit_insn_before (insn, end->insn);
2723
2724 /* Delete the doloop_end. */
2725 delete_insn (end->insn);
2726 }
2727 }
2728 }
2729
2730 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2731 static void
2732 pru_reorg (void)
2733 {
2734 rtx_insn *insns = get_insns ();
2735
2736 compute_bb_for_insn ();
2737 df_analyze ();
2738
2739 /* Need correct insn lengths for allowing LOOP instruction
2740 emitting due to U8_PCREL limitations. */
2741 shorten_branches (get_insns ());
2742
2743 /* The generic reorg_loops () is not suitable for PRU because
2744 it doesn't handle doloop_begin/end tying. And we need our
2745 doloop_begin emitted before reload. It is difficult to coalesce
2746 UBYTE constant initial loop values into the LOOP insn during
2747 machine reorg phase. */
2748 pru_reorg_loop (insns);
2749
2750 df_finish_pass (false);
2751 }
2752 \f
2753 /* Enumerate all PRU-specific builtins. */
2754 enum pru_builtin
2755 {
2756 PRU_BUILTIN_DELAY_CYCLES,
2757 PRU_BUILTIN_max
2758 };
2759
2760 static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2761
2762 /* Implement TARGET_INIT_BUILTINS. */
2763
2764 static void
2765 pru_init_builtins (void)
2766 {
2767 tree void_ftype_longlong
2768 = build_function_type_list (void_type_node,
2769 long_long_integer_type_node,
2770 NULL);
2771
2772 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2773 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2774 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2775 NULL_TREE);
2776 }
2777
2778 /* Implement TARGET_BUILTIN_DECL. */
2779
2780 static tree
2781 pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2782 {
2783 switch (code)
2784 {
2785 case PRU_BUILTIN_DELAY_CYCLES:
2786 return pru_builtins[code];
2787 default:
2788 return error_mark_node;
2789 }
2790 }
2791 \f
2792 /* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2793 code that delays exactly ARG cycles. */
2794
2795 static rtx
2796 pru_expand_delay_cycles (rtx arg)
2797 {
2798 HOST_WIDE_INT c, n;
2799
2800 if (GET_CODE (arg) != CONST_INT)
2801 {
2802 error ("%<__delay_cycles%> only takes constant arguments");
2803 return NULL_RTX;
2804 }
2805
2806 c = INTVAL (arg);
2807
2808 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2809 if (c < 0)
2810 {
2811 error ("%<__delay_cycles%> only takes non-negative cycle counts");
2812 return NULL_RTX;
2813 }
2814
2815 emit_insn (gen_delay_cycles_start (arg));
2816
2817 /* For 32-bit loops, there's 2 + 2x cycles. */
2818 if (c > 2 * 0xffff + 1)
2819 {
2820 n = (c - 2) / 2;
2821 c -= (n * 2) + 2;
2822 if ((unsigned long long) n > 0xffffffffULL)
2823 {
2824 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
2825 return NULL_RTX;
2826 }
2827 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
2828 }
2829
2830 /* For 16-bit loops, there's 1 + 2x cycles. */
2831 if (c > 2)
2832 {
2833 n = (c - 1) / 2;
2834 c -= (n * 2) + 1;
2835
2836 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
2837 }
2838
2839 while (c > 0)
2840 {
2841 emit_insn (gen_delay_cycles_1 ());
2842 c -= 1;
2843 }
2844
2845 emit_insn (gen_delay_cycles_end (arg));
2846
2847 return NULL_RTX;
2848 }
2849
2850
2851 /* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
2852 a built-in function, with result going to TARGET if that's convenient
2853 (and in mode MODE if that's convenient).
2854 SUBTARGET may be used as the target for computing one of EXP's operands.
2855 IGNORE is nonzero if the value is to be ignored. */
2856
2857 static rtx
2858 pru_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
2859 rtx subtarget ATTRIBUTE_UNUSED,
2860 machine_mode mode ATTRIBUTE_UNUSED,
2861 int ignore ATTRIBUTE_UNUSED)
2862 {
2863 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
2864 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
2865 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
2866
2867 if (fcode == PRU_BUILTIN_DELAY_CYCLES)
2868 return pru_expand_delay_cycles (arg1);
2869
2870 internal_error ("bad builtin code");
2871
2872 return NULL_RTX;
2873 }
2874 \f
2875 /* Remember the last target of pru_set_current_function. */
2876 static GTY(()) tree pru_previous_fndecl;
2877
2878 /* Establish appropriate back-end context for processing the function
2879 FNDECL. The argument might be NULL to indicate processing at top
2880 level, outside of any function scope. */
2881 static void
2882 pru_set_current_function (tree fndecl)
2883 {
2884 tree old_tree = (pru_previous_fndecl
2885 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
2886 : NULL_TREE);
2887
2888 tree new_tree = (fndecl
2889 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
2890 : NULL_TREE);
2891
2892 if (fndecl && fndecl != pru_previous_fndecl)
2893 {
2894 pru_previous_fndecl = fndecl;
2895 if (old_tree == new_tree)
2896 ;
2897
2898 else if (new_tree)
2899 {
2900 cl_target_option_restore (&global_options,
2901 TREE_TARGET_OPTION (new_tree));
2902 target_reinit ();
2903 }
2904
2905 else if (old_tree)
2906 {
2907 struct cl_target_option *def
2908 = TREE_TARGET_OPTION (target_option_current_node);
2909
2910 cl_target_option_restore (&global_options, def);
2911 target_reinit ();
2912 }
2913 }
2914 }
2915 \f
2916 /* Implement TARGET_UNWIND_WORD_MODE.
2917
2918 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
2919 static scalar_int_mode
2920 pru_unwind_word_mode (void)
2921 {
2922 return SImode;
2923 }
2924 \f
2925
2926 /* Initialize the GCC target structure. */
2927 #undef TARGET_ASM_FUNCTION_PROLOGUE
2928 #define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
2929 #undef TARGET_ASM_INTEGER
2930 #define TARGET_ASM_INTEGER pru_assemble_integer
2931
2932 #undef TARGET_ASM_FILE_START
2933 #define TARGET_ASM_FILE_START pru_file_start
2934
2935 #undef TARGET_INIT_BUILTINS
2936 #define TARGET_INIT_BUILTINS pru_init_builtins
2937 #undef TARGET_EXPAND_BUILTIN
2938 #define TARGET_EXPAND_BUILTIN pru_expand_builtin
2939 #undef TARGET_BUILTIN_DECL
2940 #define TARGET_BUILTIN_DECL pru_builtin_decl
2941
2942 #undef TARGET_COMPUTE_FRAME_LAYOUT
2943 #define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
2944
2945 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2946 #define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
2947
2948 #undef TARGET_CAN_ELIMINATE
2949 #define TARGET_CAN_ELIMINATE pru_can_eliminate
2950
2951 #undef TARGET_HARD_REGNO_MODE_OK
2952 #define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
2953
2954 #undef TARGET_HARD_REGNO_SCRATCH_OK
2955 #define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
2956 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
2957 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
2958 pru_hard_regno_call_part_clobbered
2959
2960 #undef TARGET_FUNCTION_ARG
2961 #define TARGET_FUNCTION_ARG pru_function_arg
2962
2963 #undef TARGET_FUNCTION_ARG_ADVANCE
2964 #define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
2965
2966 #undef TARGET_ARG_PARTIAL_BYTES
2967 #define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
2968
2969 #undef TARGET_FUNCTION_VALUE
2970 #define TARGET_FUNCTION_VALUE pru_function_value
2971
2972 #undef TARGET_LIBCALL_VALUE
2973 #define TARGET_LIBCALL_VALUE pru_libcall_value
2974
2975 #undef TARGET_FUNCTION_VALUE_REGNO_P
2976 #define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
2977
2978 #undef TARGET_RETURN_IN_MEMORY
2979 #define TARGET_RETURN_IN_MEMORY pru_return_in_memory
2980
2981 #undef TARGET_MUST_PASS_IN_STACK
2982 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
2983
2984 #undef TARGET_LEGITIMATE_ADDRESS_P
2985 #define TARGET_LEGITIMATE_ADDRESS_P pru_legitimate_address_p
2986
2987 #undef TARGET_INIT_LIBFUNCS
2988 #define TARGET_INIT_LIBFUNCS pru_init_libfuncs
2989 #undef TARGET_LIBFUNC_GNU_PREFIX
2990 #define TARGET_LIBFUNC_GNU_PREFIX true
2991
2992 #undef TARGET_RTX_COSTS
2993 #define TARGET_RTX_COSTS pru_rtx_costs
2994
2995 #undef TARGET_PRINT_OPERAND
2996 #define TARGET_PRINT_OPERAND pru_print_operand
2997
2998 #undef TARGET_PRINT_OPERAND_ADDRESS
2999 #define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3000
3001 #undef TARGET_OPTION_OVERRIDE
3002 #define TARGET_OPTION_OVERRIDE pru_option_override
3003
3004 #undef TARGET_SET_CURRENT_FUNCTION
3005 #define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3006
3007 #undef TARGET_MACHINE_DEPENDENT_REORG
3008 #define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3009
3010 #undef TARGET_CAN_USE_DOLOOP_P
3011 #define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3012
3013 #undef TARGET_INVALID_WITHIN_DOLOOP
3014 #define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3015
3016 #undef TARGET_UNWIND_WORD_MODE
3017 #define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3018
3019 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3020 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3021
3022 struct gcc_target targetm = TARGET_INITIALIZER;
3023
3024 #include "gt-pru.h"