]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pru/pru.cc
LibF7: Implement mul_mant for devices without MUL instruction.
[thirdparty/gcc.git] / gcc / config / pru / pru.cc
CommitLineData
8d2af3a2 1/* Target machine subroutines for TI PRU.
83ffe9cd 2 Copyright (C) 2014-2023 Free Software Foundation, Inc.
8d2af3a2
DD
3 Dimitar Dimitrov <dimitar@dinux.eu>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "stringpool.h"
31#include "attribs.h"
32#include "df.h"
33#include "memmodel.h"
34#include "tm_p.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40#include "output.h"
41#include "insn-attr.h"
42#include "flags.h"
43#include "explow.h"
44#include "calls.h"
45#include "varasm.h"
46#include "expr.h"
47#include "toplev.h"
48#include "langhooks.h"
49#include "cfgrtl.h"
50#include "stor-layout.h"
51#include "dumpfile.h"
52#include "builtins.h"
53#include "pru-protos.h"
54
55/* This file should be included last. */
56#include "target-def.h"
57
58#define INIT_ARRAY_ENTRY_BYTES 2
59
60/* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62struct pru_ctable_entry pru_ctable[32];
63
64/* Forward function declarations. */
65static bool prologue_saved_reg_p (int);
66static void pru_reorg_loop (rtx_insn *);
67
68struct GTY (()) machine_function
69{
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
72
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
91};
92\f
93/* Stack layout and calling conventions.
94
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
98
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
110
111 */
112
113#define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115/* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116static void
117pru_compute_frame_layout (void)
118{
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
125
126 gcc_assert (!cfun->machine->initialized);
127
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
130
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
134
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
139 {
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
142 }
143
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
155}
156
157/* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
163 */
164static rtx
165pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
167{
168 rtx insn;
169
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
175 {
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
182 {
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185 }
186 else
187 {
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190 }
191 }
192
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197 RTX_FRAME_RELATED_P (insn) = 1;
198
199 return insn;
200}
201
202/* Add a const_int to the stack pointer register. */
203static rtx
204pru_add_to_sp (int addendum, const enum reg_note kind)
205{
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
208}
209
210/* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212static int
213xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214{
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
218
219 nregs = 0;
220
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
227
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231 {
232 regno++;
233 nregs++;
234 }
235 else
236 break;
237
238 if (!nregs)
239 return -1;
240
241 gcc_assert (UBYTE_INT (*sp_offset));
242
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
254
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
258
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
261
262 if (!do_store)
263 {
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267 }
268
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
271
272 return regno_start + nregs;
273}
274
275/* Emit function prologue. */
276void
277pru_expand_prologue (void)
278{
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
284
285 total_frame_size = cfun->machine->total_size;
286
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
289
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
292 {
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
298 }
299 else if (total_frame_size)
300 {
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
304 }
305 else
306 save_regs_base = sp_offset = 0;
307
308 regno_start = 0;
309 save_offset = save_regs_base;
310 do
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
313
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
317 {
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
321
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
324 }
325
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
333}
334
335/* Emit function epilogue. */
336void
337pru_expand_epilogue (bool sibcall_p)
338{
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
342
343 if (!sibcall_p && pru_can_use_return_insn ())
344 {
345 emit_jump_insn (gen_return ());
346 return;
347 }
348
349 emit_insn (gen_blockage ());
350
351 total_frame_size = cfun->machine->total_size;
352
353 if (frame_pointer_needed)
354 {
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
359
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362 }
363 else if (!UBYTE_INT (total_frame_size))
364 {
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368 }
369 else
370 {
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
373 }
374
375 regno_start = 0;
376 do
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
379
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
382
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
386
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
392}
393
394/* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
396rtx
397pru_get_return_address (int count)
398{
399 if (count != 0)
400 return NULL_RTX;
401
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
404}
405
406/* Implement FUNCTION_PROFILER macro. */
407void
408pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409{
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
413}
414
415/* Dump stack layout. */
416static void
417pru_dump_frame_layout (FILE *file)
418{
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
438}
439
440/* Return true if REGNO should be saved in the prologue. */
441static bool
442prologue_saved_reg_p (int regno)
443{
444 gcc_assert (GP_REG_P (regno));
445
a365fa06 446 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
8d2af3a2
DD
447 return true;
448
449 /* 32-bit FP. */
450 if (frame_pointer_needed
451 && regno >= HARD_FRAME_POINTER_REGNUM
452 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453 return true;
454
455 /* 16-bit RA. */
456 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457 return true;
458 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459 return true;
460
461 return false;
462}
463
464/* Implement TARGET_CAN_ELIMINATE. */
465static bool
466pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467{
468 if (to == STACK_POINTER_REGNUM)
469 return !frame_pointer_needed;
470 return true;
471}
472
473/* Implement INITIAL_ELIMINATION_OFFSET macro. */
474int
475pru_initial_elimination_offset (int from, int to)
476{
477 int offset;
478
479 /* Set OFFSET to the offset from the stack pointer. */
480 switch (from)
481 {
482 case FRAME_POINTER_REGNUM:
483 offset = cfun->machine->out_args_size;
484 break;
485
486 case ARG_POINTER_REGNUM:
487 offset = cfun->machine->total_size;
488 offset -= crtl->args.pretend_args_size;
489 break;
490
491 default:
492 gcc_unreachable ();
493 }
494
495 /* If we are asked for the frame pointer offset, then adjust OFFSET
496 by the offset from the frame pointer to the stack pointer. */
497 if (to == HARD_FRAME_POINTER_REGNUM)
498 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501 return offset;
502}
503
504/* Return nonzero if this function is known to have a null epilogue.
505 This allows the optimizer to omit jumps to jumps if no stack
506 was created. */
507int
508pru_can_use_return_insn (void)
509{
510 if (!reload_completed || crtl->profile)
511 return 0;
512
513 return cfun->machine->total_size == 0;
514}
515\f
516/* Implement TARGET_HARD_REGNO_MODE_OK. */
517
518static bool
519pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520{
521 switch (GET_MODE_SIZE (mode))
522 {
523 case 1: return true;
524 case 2: return (regno % 4) <= 2;
525 case 4: return (regno % 4) == 0;
526 case 8: return (regno % 4) == 0;
527 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
528 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
529 default:
530 /* TODO: Find out why VOIDmode and BLKmode are passed. */
531 gcc_assert (mode == BLKmode || mode == VOIDmode);
532 return (regno % 4) == 0;
533 }
534}
535
536/* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537 Returns true if REGNO is safe to be allocated as a scratch
538 register (for a define_peephole2) in the current function. */
539
540static bool
541pru_hard_regno_scratch_ok (unsigned int regno)
542{
543 /* Don't allow hard registers that might be part of the frame pointer.
544 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545 and don't handle a frame pointer that spans more than one register.
546 TODO: Fix those faulty places. */
547
548 if ((!reload_completed || frame_pointer_needed)
549 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550 HARD_FRAME_POINTER_REGNUM + 3)
551 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552 FRAME_POINTER_REGNUM + 3)))
553 return false;
554
555 return true;
556}
557
558
8d2af3a2
DD
559/* Worker function for `HARD_REGNO_RENAME_OK'.
560 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
561
562int
563pru_hard_regno_rename_ok (unsigned int old_reg,
564 unsigned int new_reg)
565{
566 /* Don't allow hard registers that might be part of the frame pointer.
567 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
568 and don't care for a frame pointer that spans more than one register.
569 TODO: Fix those faulty places. */
570 if ((!reload_completed || frame_pointer_needed)
571 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
572 HARD_FRAME_POINTER_REGNUM + 3)
573 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
574 FRAME_POINTER_REGNUM + 3)
575 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
576 HARD_FRAME_POINTER_REGNUM + 3)
577 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
578 FRAME_POINTER_REGNUM + 3)))
579 return 0;
580
581 return 1;
582}
583\f
584/* Allocate a chunk of memory for per-function machine-dependent data. */
585static struct machine_function *
586pru_init_machine_status (void)
587{
588 return ggc_cleared_alloc<machine_function> ();
589}
590
591/* Implement TARGET_OPTION_OVERRIDE. */
592static void
593pru_option_override (void)
594{
595#ifdef SUBTARGET_OVERRIDE_OPTIONS
596 SUBTARGET_OVERRIDE_OPTIONS;
597#endif
598
599 /* Check for unsupported options. */
600 if (flag_pic == 1)
601 warning (OPT_fpic, "%<-fpic%> is not supported");
602 if (flag_pic == 2)
603 warning (OPT_fPIC, "%<-fPIC%> is not supported");
604 if (flag_pie == 1)
605 warning (OPT_fpie, "%<-fpie%> is not supported");
606 if (flag_pie == 2)
607 warning (OPT_fPIE, "%<-fPIE%> is not supported");
608
609 /* QBxx conditional branching cannot cope with block reordering. */
610 if (flag_reorder_blocks_and_partition)
611 {
612 inform (input_location, "%<-freorder-blocks-and-partition%> "
613 "not supported on this architecture");
614 flag_reorder_blocks_and_partition = 0;
615 flag_reorder_blocks = 1;
616 }
617
618 /* Function to allocate machine-dependent function status. */
619 init_machine_status = &pru_init_machine_status;
620
621 /* Save the initial options in case the user does function specific
622 options. */
623 target_option_default_node = target_option_current_node
ba948b37 624 = build_target_option_node (&global_options, &global_options_set);
8d2af3a2
DD
625
626 /* Due to difficulties in implementing the TI ABI with GCC,
627 at least check and error-out if GCC cannot compile a
628 compliant output. */
629 pru_register_abicheck_pass ();
630}
631\f
632/* Compute a (partial) cost for rtx X. Return true if the complete
633 cost has been computed, and false if subexpressions should be
634 scanned. In either case, *TOTAL contains the cost result. */
635static bool
636pru_rtx_costs (rtx x, machine_mode mode,
637 int outer_code, int opno ATTRIBUTE_UNUSED,
638 int *total, bool speed ATTRIBUTE_UNUSED)
639{
640 const int code = GET_CODE (x);
641
642 switch (code)
643 {
644 case CONST_INT:
645 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
646 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
647 {
648 *total = COSTS_N_INSNS (0);
649 return true;
650 }
651 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
652 || (mode != VOIDmode && const_uhword_operand (x, mode)))
653 {
654 *total = COSTS_N_INSNS (1);
655 return true;
656 }
657 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
658 {
659 *total = COSTS_N_INSNS (0);
660 return true;
661 }
662 else
663 {
664 *total = COSTS_N_INSNS (2);
665 return true;
666 }
667
668 case LABEL_REF:
669 case SYMBOL_REF:
670 case CONST:
671 {
672 *total = COSTS_N_INSNS (1);
673 return true;
674 }
675 case CONST_DOUBLE:
676 {
677 *total = COSTS_N_INSNS (2);
678 return true;
679 }
680 case CONST_WIDE_INT:
681 {
682 /* PRU declares no vector or very large integer types. */
683 gcc_unreachable ();
684 return true;
685 }
686 case SET:
687 {
688 int factor;
689
690 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
691 the mode for the factor. */
692 mode = GET_MODE (SET_DEST (x));
693
694 /* SI move has the same cost as a QI move. Moves larger than
695 64 bits are costly. */
696 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
697 *total = factor * COSTS_N_INSNS (1);
698
699 return false;
700 }
701
702 case MULT:
703 {
704 /* Factor in that "mul" requires fixed registers, which
705 would likely require register moves. */
706 *total = COSTS_N_INSNS (7);
707 return false;
708 }
709 case PLUS:
710 {
711 rtx op0 = XEXP (x, 0);
712 rtx op1 = XEXP (x, 1);
713 machine_mode op1_mode = GET_MODE (op1);
714
715 /* Generic RTL address expressions do not enforce mode for
716 offsets, yet our UBYTE constraint requires it. Fix it here. */
717 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
718 op1_mode = Pmode;
719 if (outer_code == MEM
720 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
721 || ctable_addr_operand (op0, VOIDmode)
722 || ctable_addr_operand (op1, VOIDmode)
723 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
724 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
725 {
726 /* CTABLE or REG base addressing - PLUS comes for free. */
727 *total = COSTS_N_INSNS (0);
728 return true;
729 }
730 else
731 {
732 *total = COSTS_N_INSNS (1);
733 return false;
734 }
735 }
736 case SIGN_EXTEND:
737 {
738 *total = COSTS_N_INSNS (3);
739 return false;
740 }
741 case ASHIFTRT:
742 {
743 rtx op1 = XEXP (x, 1);
744 if (const_1_operand (op1, VOIDmode))
745 *total = COSTS_N_INSNS (3);
746 else
747 *total = COSTS_N_INSNS (7);
748 return false;
749 }
750 case ZERO_EXTRACT:
751 {
752 rtx op2 = XEXP (x, 2);
753 if ((outer_code == EQ || outer_code == NE)
754 && CONST_INT_P (op2)
755 && INTVAL (op2) == 1)
756 {
757 /* Branch if bit is set/clear is a single instruction. */
758 *total = COSTS_N_INSNS (0);
759 return true;
760 }
761 else
762 {
763 *total = COSTS_N_INSNS (2);
764 return false;
765 }
766 }
767 case ZERO_EXTEND:
768 {
10dd6dea
DD
769 /* 64-bit zero extensions actually have a cost because they
770 require setting a register to zero.
771 32-bit and smaller are free. */
772 int factor = (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (SImode)) ? 0 : 1;
773 *total = factor * COSTS_N_INSNS (1);
8d2af3a2
DD
774 return false;
775 }
776
777 default:
778 {
779 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
780 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
781 *total = factor * COSTS_N_INSNS (1);
782 return false;
783 }
784 }
785}
786\f
787static GTY(()) rtx eqdf_libfunc;
788static GTY(()) rtx nedf_libfunc;
789static GTY(()) rtx ledf_libfunc;
790static GTY(()) rtx ltdf_libfunc;
791static GTY(()) rtx gedf_libfunc;
792static GTY(()) rtx gtdf_libfunc;
793static GTY(()) rtx eqsf_libfunc;
794static GTY(()) rtx nesf_libfunc;
795static GTY(()) rtx lesf_libfunc;
796static GTY(()) rtx ltsf_libfunc;
797static GTY(()) rtx gesf_libfunc;
798static GTY(()) rtx gtsf_libfunc;
799
800/* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
801 functions to match the PRU ABI. */
802
803static void
804pru_init_libfuncs (void)
805{
806 /* Double-precision floating-point arithmetic. */
807 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
808 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
809 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
810 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
811 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
812
813 /* Single-precision floating-point arithmetic. */
814 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
815 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
816 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
817 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
818 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
819
820 /* Floating-point comparisons. */
821 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
822 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
823 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
824 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
825 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
826 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
827 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
828 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
829 ledf_libfunc = init_one_libfunc ("__pruabi_led");
830 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
831 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
832 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
833
834 /* In PRU ABI, much like other TI processors, floating point
835 comparisons return non-standard values. This quirk is handled
836 by disabling the optab library functions, and handling the
837 comparison during RTL expansion. */
838 set_optab_libfunc (eq_optab, SFmode, NULL);
839 set_optab_libfunc (ne_optab, SFmode, NULL);
840 set_optab_libfunc (gt_optab, SFmode, NULL);
841 set_optab_libfunc (ge_optab, SFmode, NULL);
842 set_optab_libfunc (lt_optab, SFmode, NULL);
843 set_optab_libfunc (le_optab, SFmode, NULL);
844 set_optab_libfunc (eq_optab, DFmode, NULL);
845 set_optab_libfunc (ne_optab, DFmode, NULL);
846 set_optab_libfunc (gt_optab, DFmode, NULL);
847 set_optab_libfunc (ge_optab, DFmode, NULL);
848 set_optab_libfunc (lt_optab, DFmode, NULL);
849 set_optab_libfunc (le_optab, DFmode, NULL);
850
851 /* The isunordered function appears to be supported only by GCC. */
852 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
853 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
854
855 /* Floating-point to integer conversions. */
856 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
857 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
858 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
859 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
860 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
861 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
862 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
863 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
864
865 /* Conversions between floating types. */
866 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
867 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
868
869 /* Integer to floating-point conversions. */
870 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
871 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
872 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
873 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
874 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
875 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
876 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
877 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
878
879 /* Long long. */
880 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
881 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
882 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
883 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
884
885 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
886 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
887 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
888 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
889 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
890 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
891 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
892 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
893 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
894 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
895 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
896}
897
e95e91ec
DD
898/* Given a comparison CODE, return a similar comparison but without
899 the "equals" condition. In other words, it strips GE/GEU/LE/LEU
900 and instead returns GT/GTU/LT/LTU. */
901
902enum rtx_code
903pru_noteq_condition (enum rtx_code code)
904{
905 switch (code)
906 {
907 case GT: return GT;
908 case GTU: return GTU;
909 case GE: return GT;
910 case GEU: return GTU;
911 case LT: return LT;
912 case LTU: return LTU;
913 case LE: return LT;
914 case LEU: return LTU;
915 default:
916 gcc_unreachable ();
917 }
918}
8d2af3a2
DD
919
920/* Emit comparison instruction if necessary, returning the expression
921 that holds the compare result in the proper mode. Return the comparison
922 that should be used in the jump insn. */
923
924rtx
925pru_expand_fp_compare (rtx comparison, machine_mode mode)
926{
927 enum rtx_code code = GET_CODE (comparison);
928 rtx op0 = XEXP (comparison, 0);
929 rtx op1 = XEXP (comparison, 1);
930 rtx cmp;
931 enum rtx_code jump_code = code;
932 machine_mode op_mode = GET_MODE (op0);
933 rtx_insn *insns;
934 rtx libfunc;
935
936 gcc_assert (op_mode == DFmode || op_mode == SFmode);
937
938 /* FP exceptions are not raised by PRU's softfp implementation. So the
939 following transformations are safe. */
940 if (code == UNGE)
941 {
942 code = LT;
943 jump_code = EQ;
944 }
945 else if (code == UNLE)
946 {
947 code = GT;
948 jump_code = EQ;
949 }
950 else
951 jump_code = NE;
952
953 switch (code)
954 {
955 case EQ:
956 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
957 break;
958 case NE:
959 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
960 break;
961 case GT:
962 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
963 break;
964 case GE:
965 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
966 break;
967 case LT:
968 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
969 break;
970 case LE:
971 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
972 break;
973 default:
974 gcc_unreachable ();
975 }
976 start_sequence ();
977
978 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
979 op0, op_mode, op1, op_mode);
980 insns = get_insns ();
981 end_sequence ();
982
983 emit_libcall_block (insns, cmp, cmp,
984 gen_rtx_fmt_ee (code, SImode, op0, op1));
985
986 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
987}
988\f
989/* Return the sign bit position for given OP's mode. */
990static int
991sign_bit_position (const rtx op)
992{
993 const int sz = GET_MODE_SIZE (GET_MODE (op));
994
995 return sz * 8 - 1;
996}
997
10dd6dea
DD
998/* Parse the given CVAL integer value, and extract the "filling" byte
999 range of consecutive 0xff byte values. Rest of bytes must be 0x00.
1000 There must be only one range in the given value. This range would
1001 typically be used to calculate the parameters of
1002 PRU instructions ZERO and FILL.
1003
1004 The parameter MODE determines the maximum byte range to consider
1005 in the given input constant.
1006
1007 Example input:
1008 cval = 0xffffffffffffff00 = -256
1009 mode = SImode
1010 Return value:
1011 start = 1
1012 nbytes = 3
1013
1014 On error, return a range with -1 for START and NBYTES. */
1015pru_byterange
1016pru_calc_byterange (HOST_WIDE_INT cval, machine_mode mode)
8d2af3a2 1017{
10dd6dea
DD
1018 const pru_byterange invalid_range = { -1, -1 };
1019 pru_byterange r = invalid_range;
1020 enum { ST_FFS, ST_INRANGE, ST_TRAILING_ZEROS } st = ST_FFS;
1021 int i;
8d2af3a2 1022
10dd6dea 1023 for (i = 0; i < GET_MODE_SIZE (mode); i++)
8d2af3a2 1024 {
10dd6dea
DD
1025 const int b = cval & ((1U << BITS_PER_UNIT) - 1);
1026 cval >>= BITS_PER_UNIT;
1027
1028 if (b == 0x00 && (st == ST_FFS || st == ST_TRAILING_ZEROS))
1029 /* No action. */;
1030 else if (b == 0x00 && st == ST_INRANGE)
1031 st = ST_TRAILING_ZEROS;
1032 else if (b == 0xff && st == ST_FFS)
1033 {
1034 st = ST_INRANGE;
1035 r.start = i;
1036 r.nbytes = 1;
1037 }
1038 else if (b == 0xff && st == ST_INRANGE)
1039 r.nbytes++;
1040 else
1041 return invalid_range;
8d2af3a2
DD
1042 }
1043
10dd6dea
DD
1044 if (st != ST_TRAILING_ZEROS && st != ST_INRANGE)
1045 return invalid_range;
1046 return r;
8d2af3a2
DD
1047}
1048\f
1049/* Branches and compares. */
1050
1051/* PRU's ALU does not support signed comparison operations. That's why we
1052 emulate them. By first checking the sign bit and handling every possible
1053 operand sign combination, we can simulate signed comparisons in just
1054 5 instructions. See table below.
1055
1056.-------------------.---------------------------------------------------.
1057| Operand sign bit | Mapping the signed comparison to an unsigned one |
1058|---------+---------+------------+------------+------------+------------|
1059| OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1060|---------+---------+------------+------------+------------+------------|
1061| 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1062|---------+---------+------------+------------+------------+------------|
1063| 0 | 1 | false | false | true | true |
1064|---------+---------+------------+------------+------------+------------|
1065| 1 | 0 | true | true | false | false |
1066|---------+---------+------------+------------+------------+------------|
1067| 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1068`---------'---------'------------'------------'------------+------------'
1069
1070
1071Given the table above, here is an example for a concrete op:
1072 LT:
1073 qbbc OP1_POS, OP1, 31
1074 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1075 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1076 ; jmp OUT -> can be eliminated because we'll take the
1077 ; following branch. OP2.b31 is guaranteed to be 1
1078 ; by the time we get here.
1079 OP1_POS: qbbs OUT, OP2, 31
1080 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1081#if FAR_JUMP
1082 jmp OUT
1083BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1084#endif
1085 OUT:
1086
1087*/
1088
1089/* Output asm code for a signed-compare LT/LE conditional branch. */
1090static const char *
1091pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1092{
1093 static char buf[1024];
1094 enum rtx_code code = GET_CODE (operands[0]);
1095 rtx op1;
1096 rtx op2;
1097 const char *cmp_opstr;
1098 int bufi = 0;
1099
1100 op1 = operands[1];
1101 op2 = operands[2];
1102
1103 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1104
1105 /* Determine the comparison operators for positive and negative operands. */
1106 if (code == LT)
1107 cmp_opstr = "qblt";
1108 else if (code == LE)
1109 cmp_opstr = "qble";
1110 else
1111 gcc_unreachable ();
1112
1113 if (is_near)
1114 bufi = snprintf (buf, sizeof (buf),
1115 "qbbc\t.+12, %%1, %d\n\t"
1116 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1117 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1118 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1119 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1120 sign_bit_position (op1),
1121 sign_bit_position (op2),
1122 cmp_opstr,
1123 sign_bit_position (op2),
1124 cmp_opstr);
1125 else
1126 bufi = snprintf (buf, sizeof (buf),
1127 "qbbc\t.+12, %%1, %d\n\t"
1128 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1129 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1130 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1131 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1132 "jmp\t.+8\n\t" /* jmp OUT. */
1133 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1134 sign_bit_position (op1),
1135 sign_bit_position (op2),
1136 cmp_opstr,
1137 sign_bit_position (op2),
1138 cmp_opstr);
1139
1140 gcc_assert (bufi > 0);
1141 gcc_assert ((unsigned int) bufi < sizeof (buf));
1142
1143 return buf;
1144}
1145
1146/* Output asm code for a signed-compare GT/GE conditional branch. */
1147static const char *
1148pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1149{
1150 static char buf[1024];
1151 enum rtx_code code = GET_CODE (operands[0]);
1152 rtx op1;
1153 rtx op2;
1154 const char *cmp_opstr;
1155 int bufi = 0;
1156
1157 op1 = operands[1];
1158 op2 = operands[2];
1159
1160 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1161
1162 /* Determine the comparison operators for positive and negative operands. */
1163 if (code == GT)
1164 cmp_opstr = "qbgt";
1165 else if (code == GE)
1166 cmp_opstr = "qbge";
1167 else
1168 gcc_unreachable ();
1169
1170 if (is_near)
1171 bufi = snprintf (buf, sizeof (buf),
1172 "qbbs\t.+12, %%1, %d\n\t"
1173 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1174 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1175 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1176 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1177 sign_bit_position (op1),
1178 sign_bit_position (op2),
1179 cmp_opstr,
1180 sign_bit_position (op2),
1181 cmp_opstr);
1182 else
1183 bufi = snprintf (buf, sizeof (buf),
1184 "qbbs\t.+12, %%1, %d\n\t"
1185 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1186 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1187 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1188 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1189 "jmp\t.+8\n\t" /* jmp OUT. */
1190 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1191 sign_bit_position (op1),
1192 sign_bit_position (op2),
1193 cmp_opstr,
1194 sign_bit_position (op2),
1195 cmp_opstr);
1196
1197 gcc_assert (bufi > 0);
1198 gcc_assert ((unsigned int) bufi < sizeof (buf));
1199
1200 return buf;
1201}
1202
1203/* Output asm code for a signed-compare conditional branch.
1204
1205 If IS_NEAR is true, then QBBx instructions may be used for reaching
1206 the destination label. Otherwise JMP is used, at the expense of
1207 increased code size. */
1208const char *
1209pru_output_signed_cbranch (rtx *operands, bool is_near)
1210{
1211 enum rtx_code code = GET_CODE (operands[0]);
1212
1213 if (code == LT || code == LE)
1214 return pru_output_ltle_signed_cbranch (operands, is_near);
1215 else if (code == GT || code == GE)
1216 return pru_output_gtge_signed_cbranch (operands, is_near);
1217 else
1218 gcc_unreachable ();
1219}
1220
1221/* Optimized version of pru_output_signed_cbranch for constant second
1222 operand. */
1223
1224const char *
1225pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1226{
1227 static char buf[1024];
1228 enum rtx_code code = GET_CODE (operands[0]);
1229 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1230 const char *cmp_opstr;
1231 const char *rcmp_opstr;
1232
1233 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1234 code = swap_condition (code);
1235
1236 /* Determine normal and reversed comparison operators for both positive
1237 operands. This enables us to go completely unsigned.
1238
1239 NOTE: We cannot use the R print modifier because we convert signed
1240 comparison operators to unsigned ones. */
1241 switch (code)
1242 {
1243 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1244 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1245 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1246 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1247 default: gcc_unreachable ();
1248 }
1249
1250 /* OP2 is a constant unsigned byte - utilize this info to generate
1251 optimized code. We can "remove half" of the op table above because
1252 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1253 if (code == LT || code == LE)
1254 {
1255 if (is_near)
1256 snprintf (buf, sizeof (buf),
1257 "qbbs\t.+8, %%1, %d\n\t"
1258 "%s\t%%l3, %%1, %%u2",
1259 regop_sign_bit_pos,
1260 cmp_opstr);
1261 else
1262 snprintf (buf, sizeof (buf),
1263 "qbbs\t.+12, %%1, %d\n\t"
1264 "%s\t.+8, %%1, %%u2\n\t"
1265 "jmp\t%%%%label(%%l3)",
1266 regop_sign_bit_pos,
1267 rcmp_opstr);
1268 }
1269 else if (code == GT || code == GE)
1270 {
1271 if (is_near)
1272 snprintf (buf, sizeof (buf),
1273 "qbbs\t%%l3, %%1, %d\n\t"
1274 "%s\t%%l3, %%1, %%u2",
1275 regop_sign_bit_pos,
1276 cmp_opstr);
1277 else
1278 snprintf (buf, sizeof (buf),
1279 "qbbs\t.+8, %%1, %d\n\t"
1280 "%s\t.+8, %%1, %%u2\n\t"
1281 "jmp\t%%%%label(%%l3)",
1282 regop_sign_bit_pos,
1283 rcmp_opstr);
1284 }
1285 else
1286 gcc_unreachable ();
1287
1288 return buf;
1289}
1290
1291/* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1292 zero second operand. */
1293
1294const char *
1295pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1296{
1297 static char buf[1024];
1298 enum rtx_code code = GET_CODE (operands[0]);
1299 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1300
1301 /* OP2 is a constant zero - utilize this info to simply check the
1302 OP1 sign bit when comparing for LT or GE. */
1303 if (code == LT)
1304 {
1305 if (is_near)
1306 snprintf (buf, sizeof (buf),
1307 "qbbs\t%%l3, %%1, %d\n\t",
1308 regop_sign_bit_pos);
1309 else
1310 snprintf (buf, sizeof (buf),
1311 "qbbc\t.+8, %%1, %d\n\t"
1312 "jmp\t%%%%label(%%l3)",
1313 regop_sign_bit_pos);
1314 }
1315 else if (code == GE)
1316 {
1317 if (is_near)
1318 snprintf (buf, sizeof (buf),
1319 "qbbc\t%%l3, %%1, %d\n\t",
1320 regop_sign_bit_pos);
1321 else
1322 snprintf (buf, sizeof (buf),
1323 "qbbs\t.+8, %%1, %d\n\t"
1324 "jmp\t%%%%label(%%l3)",
1325 regop_sign_bit_pos);
1326 }
1327 else
1328 gcc_unreachable ();
1329
1330 return buf;
1331}
1332
1333/* Addressing Modes. */
1334
1335/* Return true if register REGNO is a valid base register.
1336 STRICT_P is true if REG_OK_STRICT is in effect. */
1337
1338bool
1339pru_regno_ok_for_base_p (int regno, bool strict_p)
1340{
1341 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1342 return true;
1343
1344 /* The fake registers will be eliminated to either the stack or
1345 hard frame pointer, both of which are usually valid base registers.
1346 Reload deals with the cases where the eliminated form isn't valid. */
1347 return (GP_REG_P (regno)
1348 || regno == FRAME_POINTER_REGNUM
1349 || regno == ARG_POINTER_REGNUM);
1350}
1351
1352/* Return true if given xbbo constant OFFSET is valid. */
1353static bool
1354pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1355{
1356 bool valid = UBYTE_INT (offset);
1357
1358 /* Reload can split multi word accesses, so make sure we can address
1359 the second word in a DI. */
1360 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1361 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1362
1363 return valid;
1364}
1365
1366/* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1367 base was not found in the pragma-filled pru_ctable. */
1368int
1369pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1370{
1371 unsigned int i;
1372
1373 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1374 {
1375 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1376 return i;
1377 }
1378 return -1;
1379}
1380
1381
1382/* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1383 and return the base CTABLE index if possible. */
1384int
1385pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1386{
1387 unsigned int i;
1388
1389 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1390 {
1391 if (pru_ctable[i].valid && IN_RANGE (caddr,
1392 pru_ctable[i].base,
1393 pru_ctable[i].base + 0xff))
1394 return i;
1395 }
1396 return -1;
1397}
1398
1399
1400/* Return the offset from some CTABLE base for this address. */
1401int
1402pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1403{
1404 int i;
1405
1406 i = pru_get_ctable_base_index (caddr);
1407 gcc_assert (i >= 0);
1408
1409 return caddr - pru_ctable[i].base;
1410}
1411
1412/* Return true if the address expression formed by BASE + OFFSET is
1413 valid.
1414
1415 Note that the following address is not handled here:
1416 base CTABLE constant base + UBYTE constant offset
1417 The constants will be folded. The ctable_addr_operand predicate will take
1418 care of the validation. The CTABLE base+offset split will happen during
1419 operand printing. */
1420static bool
1421pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1422{
1423 if (!strict_p && GET_CODE (base) == SUBREG)
1424 base = SUBREG_REG (base);
1425 if (!strict_p && GET_CODE (offset) == SUBREG)
1426 offset = SUBREG_REG (offset);
1427
1428 if (REG_P (base)
1429 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1430 && ((CONST_INT_P (offset)
1431 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1432 || (REG_P (offset)
1433 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1434 /* base register + register offset
1435 * OR base register + UBYTE constant offset. */
1436 return true;
1437 else if (REG_P (base)
1438 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1439 && ctable_base_operand (offset, VOIDmode))
1440 /* base CTABLE constant base + register offset
1441 * Note: GCC always puts the register as a first operand of PLUS. */
1442 return true;
1443 else
1444 return false;
1445}
1446
8bafc964
DD
1447/* Return register number (either for r30 or r31) which maps to the
1448 corresponding symbol OP's name in the __regio_symbol address namespace.
1449
1450 If no mapping can be established (i.e. symbol name is invalid), then
1451 return -1. */
1452int pru_symref2ioregno (rtx op)
1453{
1454 if (!SYMBOL_REF_P (op))
1455 return -1;
1456
1457 const char *name = XSTR (op, 0);
1458 if (!strcmp (name, "__R30"))
1459 return R30_REGNUM;
1460 else if (!strcmp (name, "__R31"))
1461 return R31_REGNUM;
1462 else
1463 return -1;
1464}
1465
1466/* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. */
8d2af3a2 1467static bool
8bafc964 1468pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand,
165b1f6a
KL
1469 bool strict_p, addr_space_t as,
1470 code_helper = ERROR_MARK)
8d2af3a2 1471{
8bafc964
DD
1472 if (as == ADDR_SPACE_REGIO)
1473 {
1474 /* Address space constraints for __regio_symbol have been checked in
1475 TARGET_INSERT_ATTRIBUTES, and some more checks will be done
1476 during RTL expansion of "mov<mode>". */
1477 return true;
1478 }
1479 else if (as != ADDR_SPACE_GENERIC)
1480 {
1481 gcc_unreachable ();
1482 }
1483
8d2af3a2
DD
1484 switch (GET_CODE (operand))
1485 {
1486 /* Direct. */
1487 case SYMBOL_REF:
1488 case LABEL_REF:
1489 case CONST:
1490 case CONST_WIDE_INT:
1491 return false;
1492
1493 case CONST_INT:
1494 return ctable_addr_operand (operand, VOIDmode);
1495
1496 /* Register indirect. */
1497 case REG:
1498 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1499
1500 /* Register indirect with displacement. */
1501 case PLUS:
1502 {
1503 rtx op0 = XEXP (operand, 0);
1504 rtx op1 = XEXP (operand, 1);
1505
1506 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1507 }
1508
1509 default:
1510 break;
1511 }
1512 return false;
1513}
1514\f
1515/* Output assembly language related definitions. */
1516
1517/* Implement TARGET_ASM_CONSTRUCTOR. */
1518static void
1519pru_elf_asm_constructor (rtx symbol, int priority)
1520{
1521 char buf[23];
1522 section *s;
1523
1524 if (priority == DEFAULT_INIT_PRIORITY)
1525 snprintf (buf, sizeof (buf), ".init_array");
1526 else
1527 {
1528 /* While priority is known to be in range [0, 65535], so 18 bytes
1529 would be enough, the compiler might not know that. To avoid
1530 -Wformat-truncation false positive, use a larger size. */
1531 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1532 }
1533 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1534 switch_to_section (s);
1535 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1536}
1537
1538/* Implement TARGET_ASM_DESTRUCTOR. */
1539static void
1540pru_elf_asm_destructor (rtx symbol, int priority)
1541{
1542 char buf[23];
1543 section *s;
1544
1545 if (priority == DEFAULT_INIT_PRIORITY)
1546 snprintf (buf, sizeof (buf), ".fini_array");
1547 else
1548 {
1549 /* While priority is known to be in range [0, 65535], so 18 bytes
1550 would be enough, the compiler might not know that. To avoid
1551 -Wformat-truncation false positive, use a larger size. */
1552 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1553 }
1554 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1555 switch_to_section (s);
1556 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1557}
1558
1559/* Map rtx_code to unsigned PRU branch op suffix. Callers must
1560 handle sign comparison themselves for signed operations. */
1561static const char *
1562pru_comparison_str (enum rtx_code cond)
1563{
1564 switch (cond)
1565 {
1566 case NE: return "ne";
1567 case EQ: return "eq";
1568 case GEU: return "ge";
1569 case GTU: return "gt";
1570 case LEU: return "le";
1571 case LTU: return "lt";
1572 default: gcc_unreachable ();
1573 }
1574}
1575
1576/* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1577 the bit representation of X by "casting" it to CONST_INT. */
1578
1579static rtx
1580pru_to_int_mode (rtx x)
1581{
1582 machine_mode mode = GET_MODE (x);
1583
1584 return VOIDmode == mode
1585 ? x
1586 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1587}
1588
1589/* Translate between the MachineDescription notion
1590 of 8-bit consecutive registers, to the PRU
1591 assembler syntax of REGWORD[.SUBREG]. */
1592static const char *
1593pru_asm_regname (rtx op)
1594{
1595 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1596 int speci, regi;
1597
1598 gcc_assert (REG_P (op));
1599
1600 if (!canon_reg_names[0][0][0])
1601 {
1602 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1603 for (speci = 0; speci < 3; speci++)
1604 {
1605 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1606 if ((regi + sz) > (32 * 4))
1607 continue; /* Invalid entry. */
1608
1609 /* Construct the lookup table. */
1610 const char *suffix = "";
1611
1612 switch ((sz << 8) | (regi % 4))
1613 {
1614 case (1 << 8) | 0: suffix = ".b0"; break;
1615 case (1 << 8) | 1: suffix = ".b1"; break;
1616 case (1 << 8) | 2: suffix = ".b2"; break;
1617 case (1 << 8) | 3: suffix = ".b3"; break;
1618 case (2 << 8) | 0: suffix = ".w0"; break;
1619 case (2 << 8) | 1: suffix = ".w1"; break;
1620 case (2 << 8) | 2: suffix = ".w2"; break;
1621 case (4 << 8) | 0: suffix = ""; break;
1622 default:
1623 /* Invalid entry. */
1624 continue;
1625 }
1626 sprintf (&canon_reg_names[speci][regi][0],
1627 "r%d%s", regi / 4, suffix);
1628 }
1629 }
1630
1631 switch (GET_MODE_SIZE (GET_MODE (op)))
1632 {
1633 case 1: speci = 0; break;
1634 case 2: speci = 1; break;
1635 case 4: speci = 2; break;
1636 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1637 default: gcc_unreachable ();
1638 }
1639 regi = REGNO (op);
1640 gcc_assert (regi < LAST_GP_REGNUM);
1641 gcc_assert (canon_reg_names[speci][regi][0]);
1642
1643 return &canon_reg_names[speci][regi][0];
1644}
1645
1646/* Print the operand OP to file stream FILE modified by LETTER.
1647 LETTER can be one of:
1648
1649 b: prints the register byte start (used by LBBO/SBBO).
1650 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1651 F: Full 32-bit register.
1652 H: Higher 16-bits of a const_int operand.
1653 L: Lower 16-bits of a const_int operand.
1654 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1655 P: prints swapped condition.
1656 Q: prints swapped and reversed condition.
1657 R: prints reversed condition.
1658 S: print operand mode size (but do not print the operand itself).
1659 T: print exact_log2 () for const_int operands.
1660 u: print QI constant integer as unsigned. No transformation for regs.
1661 V: print exact_log2 () of negated const_int operands.
1662 w: Lower 32-bits of a const_int operand.
1663 W: Upper 32-bits of a const_int operand.
8d2af3a2
DD
1664*/
1665static void
1666pru_print_operand (FILE *file, rtx op, int letter)
1667{
1668 switch (letter)
1669 {
1670 case 'S':
1671 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1672 return;
1673
1674 default:
1675 break;
1676 }
1677
1678 if (comparison_operator (op, VOIDmode))
1679 {
1680 enum rtx_code cond = GET_CODE (op);
1681 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1682
1683 switch (letter)
1684 {
1685 case 0:
1686 fprintf (file, "%s", pru_comparison_str (cond));
1687 return;
1688 case 'P':
1689 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1690 return;
1691 case 'Q':
1692 cond = swap_condition (cond);
3d1ca857 1693 /* Fall through. */
8d2af3a2
DD
1694 case 'R':
1695 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1696 return;
1697 }
1698 }
1699
1700 switch (GET_CODE (op))
1701 {
1702 case REG:
1703 if (letter == 0 || letter == 'u')
1704 {
1705 fprintf (file, "%s", pru_asm_regname (op));
1706 return;
1707 }
1708 else if (letter == 'b')
1709 {
1710 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1711 {
1712 output_operand_lossage ("I/O register operand for '%%%c'",
1713 letter);
1714 return;
1715 }
1716 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1717 return;
1718 }
1719 else if (letter == 'F' || letter == 'N')
1720 {
1721 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1722 {
1723 output_operand_lossage ("I/O register operand for '%%%c'",
1724 letter);
1725 return;
1726 }
1727 if (REGNO (op) % 4 != 0)
1728 {
1729 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1730 letter);
1731 return;
1732 }
1733 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1734 return;
1735 }
8d2af3a2
DD
1736 break;
1737
1738 case CONST_INT:
1739 if (letter == 'H')
1740 {
1741 HOST_WIDE_INT val = INTVAL (op);
1742 val = (val >> 16) & 0xFFFF;
1743 output_addr_const (file, gen_int_mode (val, SImode));
1744 return;
1745 }
1746 else if (letter == 'L')
1747 {
1748 HOST_WIDE_INT val = INTVAL (op);
1749 val &= 0xFFFF;
1750 output_addr_const (file, gen_int_mode (val, SImode));
1751 return;
1752 }
1753 else if (letter == 'T')
1754 {
1755 /* The predicate should have already validated the 1-high-bit
1756 requirement. Use CTZ here to deal with constant's sign
1757 extension. */
1758 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1759 if (val < 0 || val > 31)
1760 {
1761 output_operand_lossage ("invalid operand for '%%%c'", letter);
1762 return;
1763 }
1764 output_addr_const (file, gen_int_mode (val, SImode));
1765 return;
1766 }
1767 else if (letter == 'V')
1768 {
1769 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1770 if (val < 0 || val > 31)
1771 {
1772 output_operand_lossage ("invalid operand for '%%%c'", letter);
1773 return;
1774 }
1775 output_addr_const (file, gen_int_mode (val, SImode));
1776 return;
1777 }
1778 else if (letter == 'w')
1779 {
1780 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1781 output_addr_const (file, gen_int_mode (val, SImode));
1782 return;
1783 }
1784 else if (letter == 'W')
1785 {
1786 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1787 output_addr_const (file, gen_int_mode (val, SImode));
1788 return;
1789 }
1790 else if (letter == 'u')
1791 {
1792 /* Workaround GCC's representation of QI constants in sign-extended
1793 form, and PRU's assembler insistence on unsigned constant
1794 integers. See the notes about O constraint. */
1795 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1796 return;
1797 }
1798 /* Else, fall through. */
1799
1800 case CONST:
1801 case LABEL_REF:
1802 case SYMBOL_REF:
1803 if (letter == 0)
1804 {
1805 output_addr_const (file, op);
1806 return;
1807 }
1808 break;
1809
1810 case CONST_FIXED:
1811 {
1812 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1813 if (letter != 0)
1814 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1815 letter);
1816 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1817 return;
1818 }
1819 break;
1820
1821 case CONST_DOUBLE:
1822 if (letter == 0)
1823 {
1824 long val;
1825
1826 if (GET_MODE (op) != SFmode)
1827 {
1828 output_operand_lossage ("double constants not supported");
1829 return;
1830 }
1831 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1832 fprintf (file, "0x%lx", val);
1833 return;
1834 }
1835 else if (letter == 'w' || letter == 'W')
1836 {
1837 long t[2];
1838 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1839 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1840 return;
1841 }
1842 else
1843 {
1844 output_operand_lossage ("invalid operand for '%%%c'", letter);
1845 return;
1846 }
1847 break;
1848
1849 case SUBREG:
1850 /* Subregs should not appear at so late stage. */
1851 gcc_unreachable ();
1852 break;
1853
1854 case MEM:
1855 if (letter == 0)
1856 {
1857 output_address (VOIDmode, op);
1858 return;
1859 }
1860 else if (letter == 'B')
1861 {
1862 rtx base = XEXP (op, 0);
1863 if (GET_CODE (base) == PLUS)
1864 {
1865 rtx op0 = XEXP (base, 0);
1866 rtx op1 = XEXP (base, 1);
1867
1868 /* PLUS cannot have two constant operands, so first one
1869 of them must be a REG, hence we must check for an
1870 exact base address. */
1871 if (ctable_base_operand (op1, VOIDmode))
1872 {
1873 fprintf (file, "c");
1874 return;
1875 }
1876 else if (REG_P (op0))
1877 {
1878 fprintf (file, "b");
1879 return;
1880 }
1881 else
1882 gcc_unreachable ();
1883 }
1884 else if (REG_P (base))
1885 {
1886 fprintf (file, "b");
1887 return;
1888 }
1889 else if (ctable_addr_operand (base, VOIDmode))
1890 {
1891 fprintf (file, "c");
1892 return;
1893 }
1894 else
1895 gcc_unreachable ();
1896 }
1897 break;
1898
1899 case CODE_LABEL:
1900 if (letter == 0)
1901 {
1902 output_addr_const (file, op);
1903 return;
1904 }
1905 break;
1906
1907 default:
1908 break;
1909 }
1910
1911 output_operand_lossage ("unsupported operand %s for code '%c'",
1912 GET_RTX_NAME (GET_CODE (op)), letter);
1913}
1914
1915/* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1916static void
1917pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1918{
1919 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1920 {
1921 output_operand_lossage ("unexpected text address:");
1922 return;
1923 }
1924
1925 switch (GET_CODE (op))
1926 {
1927 case CONST:
1928 case LABEL_REF:
1929 case CONST_WIDE_INT:
1930 case SYMBOL_REF:
1931 break;
1932
1933 case CONST_INT:
1934 {
1935 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1936 int base = pru_get_ctable_base_index (caddr);
1937 int offs = pru_get_ctable_base_offset (caddr);
1938 if (base < 0)
1939 {
1940 output_operand_lossage ("unsupported constant address:");
1941 return;
1942 }
1943 fprintf (file, "%d, %d", base, offs);
1944 return;
1945 }
1946 break;
1947
1948 case PLUS:
1949 {
1950 int base;
1951 rtx op0 = XEXP (op, 0);
1952 rtx op1 = XEXP (op, 1);
1953
1954 if (REG_P (op0) && CONST_INT_P (op1)
1955 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1956 {
1957 base = pru_get_ctable_exact_base_index (INTVAL (op1));
1958 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1959 return;
1960 }
1961 else if (REG_P (op1) && CONST_INT_P (op0)
1962 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1963 {
1964 /* Not a valid RTL. */
1965 gcc_unreachable ();
1966 }
1967 else if (REG_P (op0) && CONSTANT_P (op1))
1968 {
1969 fprintf (file, "%s, ", pru_asm_regname (op0));
1970 output_addr_const (file, op1);
1971 return;
1972 }
1973 else if (REG_P (op1) && CONSTANT_P (op0))
1974 {
1975 /* Not a valid RTL. */
1976 gcc_unreachable ();
1977 }
1978 else if (REG_P (op1) && REG_P (op0))
1979 {
1980 fprintf (file, "%s, %s", pru_asm_regname (op0),
1981 pru_asm_regname (op1));
1982 return;
1983 }
1984 }
1985 break;
1986
1987 case REG:
1988 fprintf (file, "%s, 0", pru_asm_regname (op));
1989 return;
1990
1991 case MEM:
1992 {
1993 rtx base = XEXP (op, 0);
1994 pru_print_operand_address (file, mode, base);
1995 return;
1996 }
1997 default:
1998 break;
1999 }
2000
2001 output_operand_lossage ("unsupported memory expression:");
2002}
2003
2004/* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
2005static void
2006pru_asm_function_prologue (FILE *file)
2007{
2008 if (flag_verbose_asm || flag_debug_asm)
2009 pru_dump_frame_layout (file);
2010}
2011
2012/* Implement `TARGET_ASM_INTEGER'.
2013 Target hook for assembling integer objects. PRU version needs
2014 special handling for references to pmem. Code copied from AVR. */
2015
2016static bool
2017pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
2018{
2019 if (size == POINTER_SIZE / BITS_PER_UNIT
2020 && aligned_p
2021 && text_segment_operand (x, VOIDmode))
2022 {
2023 fputs ("\t.4byte\t%pmem(", asm_out_file);
2024 output_addr_const (asm_out_file, x);
2025 fputs (")\n", asm_out_file);
2026
2027 return true;
2028 }
2029 else if (size == INIT_ARRAY_ENTRY_BYTES
2030 && aligned_p
2031 && text_segment_operand (x, VOIDmode))
2032 {
2033 fputs ("\t.2byte\t%pmem(", asm_out_file);
2034 output_addr_const (asm_out_file, x);
2035 fputs (")\n", asm_out_file);
2036
2037 return true;
2038 }
2039 else
2040 {
2041 return default_assemble_integer (x, size, aligned_p);
2042 }
2043}
2044
dda85bc2
DD
2045/* Implement TARGET_SECTION_TYPE_FLAGS. */
2046
2047static unsigned int
2048pru_section_type_flags (tree decl, const char *name, int reloc)
2049{
2050 unsigned int flags = default_section_type_flags (decl, name, reloc);
2051
2052 /* The .pru_irq_map section is not meant to be loaded into the target
2053 memory. Instead its contents are read by the host remoteproc loader.
2054 To prevent being marked as a loadable (allocated) section, the
2055 .pru_irq_map section is intercepted and marked as a debug section. */
2056 if (!strcmp (name, ".pru_irq_map"))
2057 flags = SECTION_DEBUG | SECTION_RETAIN;
2058
2059 return flags;
2060}
2061
8d2af3a2
DD
2062/* Implement TARGET_ASM_FILE_START. */
2063
2064static void
2065pru_file_start (void)
2066{
2067 default_file_start ();
2068
2069 /* Compiler will take care of placing %label, so there is no
2070 need to confuse users with this warning. */
2071 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2072}
8bafc964
DD
2073
2074/* Scan type TYP for pointer references to address space other than
2075 ADDR_SPACE_GENERIC. Return true if such reference is found.
2076 Much of this code was taken from the avr port. */
2077
2078static bool
2079pru_nongeneric_pointer_addrspace (tree typ)
2080{
2081 while (ARRAY_TYPE == TREE_CODE (typ))
2082 typ = TREE_TYPE (typ);
2083
2084 if (POINTER_TYPE_P (typ))
2085 {
2086 addr_space_t as;
2087 tree target = TREE_TYPE (typ);
2088
2089 /* Pointer to function: Test the function's return type. */
2090 if (FUNCTION_TYPE == TREE_CODE (target))
2091 return pru_nongeneric_pointer_addrspace (TREE_TYPE (target));
2092
2093 /* "Ordinary" pointers... */
2094
2095 while (TREE_CODE (target) == ARRAY_TYPE)
2096 target = TREE_TYPE (target);
2097
2098 as = TYPE_ADDR_SPACE (target);
2099
2100 if (!ADDR_SPACE_GENERIC_P (as))
2101 return true;
2102
2103 /* Scan pointer's target type. */
2104 return pru_nongeneric_pointer_addrspace (target);
2105 }
2106
2107 return false;
2108}
2109
2110/* Implement `TARGET_INSERT_ATTRIBUTES'. For PRU it's used as a hook to
2111 provide better diagnostics for some invalid usages of the __regio_symbol
2112 address space.
2113
2114 Any escapes of the following checks are supposed to be caught
2115 during the "mov<mode>" pattern expansion. */
2116
2117static void
2118pru_insert_attributes (tree node, tree *attributes ATTRIBUTE_UNUSED)
2119{
2120
2121 /* Validate __regio_symbol variable declarations. */
2122 if (VAR_P (node))
2123 {
2124 const char *name = DECL_NAME (node)
2125 ? IDENTIFIER_POINTER (DECL_NAME (node))
2126 : "<unknown>";
2127 tree typ = TREE_TYPE (node);
2128 addr_space_t as = TYPE_ADDR_SPACE (typ);
2129
2130 if (as == ADDR_SPACE_GENERIC)
2131 return;
2132
2133 if (AGGREGATE_TYPE_P (typ))
2134 {
2135 error ("aggregate types are prohibited in "
2136 "%<__regio_symbol%> address space");
2137 /* Don't bother anymore. Below checks would pile
2138 meaningless errors, which would confuse user. */
2139 return;
2140 }
2141 if (DECL_INITIAL (node) != NULL_TREE)
2142 error ("variables in %<__regio_symbol%> address space "
2143 "cannot have initial value");
2144 if (DECL_REGISTER (node))
2145 error ("variables in %<__regio_symbol%> address space "
2146 "cannot be declared %<register%>");
2147 if (!TYPE_VOLATILE (typ))
2148 error ("variables in %<__regio_symbol%> address space "
2149 "must be declared %<volatile%>");
2150 if (!DECL_EXTERNAL (node))
2151 error ("variables in %<__regio_symbol%> address space "
2152 "must be declared %<extern%>");
2153 if (TYPE_MODE (typ) != SImode)
2154 error ("only 32-bit access is supported "
2155 "for %<__regio_symbol%> address space");
2156 if (strcmp (name, "__R30") != 0 && strcmp (name, "__R31") != 0)
2157 error ("register name %<%s%> not recognized "
2158 "in %<__regio_symbol%> address space", name);
2159 }
2160
2161 tree typ = NULL_TREE;
2162
2163 switch (TREE_CODE (node))
2164 {
2165 case FUNCTION_DECL:
2166 typ = TREE_TYPE (TREE_TYPE (node));
2167 break;
2168 case TYPE_DECL:
2169 case RESULT_DECL:
2170 case VAR_DECL:
2171 case FIELD_DECL:
2172 case PARM_DECL:
2173 typ = TREE_TYPE (node);
2174 break;
2175 case POINTER_TYPE:
2176 typ = node;
2177 break;
2178 default:
2179 break;
2180 }
2181 if (typ != NULL_TREE && pru_nongeneric_pointer_addrspace (typ))
2182 error ("pointers to %<__regio_symbol%> address space are prohibited");
2183}
8d2af3a2
DD
2184\f
2185/* Function argument related. */
2186
2187/* Return the number of bytes needed for storing an argument with
2188 the given MODE and TYPE. */
2189static int
2190pru_function_arg_size (machine_mode mode, const_tree type)
2191{
2192 HOST_WIDE_INT param_size;
2193
2194 if (mode == BLKmode)
2195 param_size = int_size_in_bytes (type);
2196 else
2197 param_size = GET_MODE_SIZE (mode);
2198
2199 /* Convert to words (round up). */
2200 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2201 gcc_assert (param_size >= 0);
2202
2203 return param_size;
2204}
2205
2206/* Check if argument with the given size must be
2207 passed/returned in a register.
2208
2209 Reference:
2210 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2211
2212 Arguments other than 8/16/24/32/64bits are passed on stack. */
2213static bool
2214pru_arg_in_reg_bysize (size_t sz)
2215{
2216 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2217}
2218
2219/* Helper function to get the starting storage HW register for an argument,
2220 or -1 if it must be passed on stack. The cum_v state is not changed. */
2221static int
2222pru_function_arg_regi (cumulative_args_t cum_v,
2223 machine_mode mode, const_tree type,
2224 bool named)
2225{
2226 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2227 size_t argsize = pru_function_arg_size (mode, type);
2228 size_t i, bi;
2229 int regi = -1;
2230
2231 if (!pru_arg_in_reg_bysize (argsize))
2232 return -1;
2233
2234 if (!named)
2235 return -1;
2236
2237 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2238 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2239 {
2240 /* VLAs and vector types are not defined in the PRU ABI. Let's
2241 handle them the same as their same-sized counterparts. This way
2242 we do not need to treat BLKmode differently, and need only to check
2243 the size. */
2244 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2245 || argsize == 4 || argsize == 8);
2246
2247 /* Ensure SI and DI arguments are stored in full registers only. */
2248 if ((argsize >= 4) && (i % 4) != 0)
2249 continue;
2250
2251 /* Structures with size 24 bits are passed starting at a full
2252 register boundary. */
2253 if (argsize == 3 && (i % 4) != 0)
2254 continue;
2255
2256 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2257 into a different full register. */
2258 if (argsize == 2 && (i % 4) == 3)
2259 continue;
2260
2261 for (bi = 0;
2262 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2263 bi++)
2264 {
2265 if (cum->regs_used[bi + i])
2266 break;
2267 }
2268 if (bi == argsize)
2269 regi = FIRST_ARG_REGNUM + i;
2270 }
2271
2272 return regi;
2273}
2274
2275/* Mark CUM_V that a function argument will occupy HW register slot starting
2276 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2277 depends on the MODE and TYPE of the argument. */
2278static void
2279pru_function_arg_regi_mark_slot (int regi,
2280 cumulative_args_t cum_v,
2281 machine_mode mode, const_tree type,
2282 bool named)
2283{
2284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2285 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2286
2287 gcc_assert (named);
2288
2289 /* Mark all byte sub-registers occupied by argument as used. */
2290 while (param_size--)
2291 {
2292 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2293 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2294 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2295 regi++;
2296 }
2297}
2298
2299/* Define where to put the arguments to a function. Value is zero to
2300 push the argument on the stack, or a hard register in which to
2301 store the argument.
2302
8d2af3a2
DD
2303 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2304 the preceding args and about the function being called.
6783fdb7 2305 ARG is a description of the argument. */
8d2af3a2
DD
2306
2307static rtx
6783fdb7 2308pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
8d2af3a2
DD
2309{
2310 rtx return_rtx = NULL_RTX;
6783fdb7 2311 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
8d2af3a2
DD
2312
2313 if (regi >= 0)
6783fdb7 2314 return_rtx = gen_rtx_REG (arg.mode, regi);
8d2af3a2
DD
2315
2316 return return_rtx;
2317}
2318
2319/* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2320 between registers and memory, so we can return 0. */
2321
2322static int
a7c81bc1 2323pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
8d2af3a2
DD
2324{
2325 return 0;
2326}
2327
6930c98c 2328/* Update the data in CUM to advance over argument ARG. */
8d2af3a2
DD
2329
2330static void
6930c98c
RS
2331pru_function_arg_advance (cumulative_args_t cum_v,
2332 const function_arg_info &arg)
8d2af3a2 2333{
6930c98c 2334 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
8d2af3a2
DD
2335
2336 if (regi >= 0)
6930c98c
RS
2337 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2338 arg.type, arg.named);
8d2af3a2
DD
2339}
2340
2341/* Implement TARGET_FUNCTION_VALUE. */
2342static rtx
2343pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2344 bool outgoing ATTRIBUTE_UNUSED)
2345{
2346 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2347}
2348
2349/* Implement TARGET_LIBCALL_VALUE. */
2350static rtx
2351pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2352{
2353 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2354}
2355
2356/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2357static bool
2358pru_function_value_regno_p (const unsigned int regno)
2359{
2360 return regno == FIRST_RETVAL_REGNUM;
2361}
2362
2363/* Implement TARGET_RETURN_IN_MEMORY. */
2364bool
2365pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2366{
2367 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2368 || int_size_in_bytes (type) == -1);
2369
2370 return in_memory;
2371}
2372\f
2373/* Implement TARGET_CAN_USE_DOLOOP_P. */
2374
2375static bool
2376pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2377 unsigned int loop_depth, bool)
2378{
2379 /* Considering limitations in the hardware, only use doloop
2380 for innermost loops which must be entered from the top. */
2381 if (loop_depth > 1)
2382 return false;
2383 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2384 holds the maximum number of loop latch executions, while PRU loop
2385 instruction needs the count of loop body executions. */
2386 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2387 return false;
2388
2389 return true;
2390}
2391
2392/* NULL if INSN insn is valid within a low-overhead loop.
2393 Otherwise return why doloop cannot be applied. */
2394
2395static const char *
2396pru_invalid_within_doloop (const rtx_insn *insn)
2397{
2398 if (CALL_P (insn))
2399 return "Function call in the loop.";
2400
2401 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2402 return "Return from a call instruction in the loop.";
2403
2404 if (NONDEBUG_INSN_P (insn)
2405 && INSN_CODE (insn) < 0
2406 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2407 || asm_noperands (PATTERN (insn)) >= 0))
2408 return "Loop contains asm statement.";
2409
2410 return NULL;
2411}
2412
2413
2414/* Figure out where to put LABEL, which is the label for a repeat loop.
2415 The loop ends just before LAST_INSN. If SHARED, insns other than the
2416 "repeat" might use LABEL to jump to the loop's continuation point.
2417
2418 Return the last instruction in the adjusted loop. */
2419
2420static rtx_insn *
2421pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2422 bool shared)
2423{
2424 rtx_insn *next, *prev;
2425 int count = 0, code, icode;
2426
2427 if (dump_file)
2428 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2429 INSN_UID (last_insn));
2430
2431 /* Set PREV to the last insn in the loop. */
2432 prev = PREV_INSN (last_insn);
2433
2434 /* Set NEXT to the next insn after the loop label. */
2435 next = last_insn;
2436 if (!shared)
2437 while (prev != 0)
2438 {
2439 code = GET_CODE (prev);
2440 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2441 break;
2442
2443 if (INSN_P (prev))
2444 {
2445 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2446 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2447
2448 /* Other insns that should not be in the last two opcodes. */
2449 icode = recog_memoized (prev);
2450 if (icode < 0
2451 || icode == CODE_FOR_pruloophi
2452 || icode == CODE_FOR_pruloopsi)
2453 break;
2454
2455 count++;
2456 next = prev;
2457 if (dump_file)
2458 print_rtl_single (dump_file, next);
2459 if (count == 2)
2460 break;
2461 }
2462 prev = PREV_INSN (prev);
2463 }
2464
2465 /* Insert the nops. */
2466 if (dump_file && count < 2)
2467 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2468 2 - count, count == 1 ? "" : "s");
2469
2470 for (; count < 2; count++)
2471 emit_insn_before (gen_nop (), last_insn);
2472
2473 /* Insert the label. */
2474 emit_label_before (label, last_insn);
2475
2476 return last_insn;
2477}
2478
2479/* If IS_END is false, expand a canonical doloop_begin RTL into the
2480 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2481 doloop_end_internal. */
2482void
2483pru_emit_doloop (rtx *operands, int is_end)
2484{
2485 rtx tag;
2486
2487 if (cfun->machine->doloop_tags == 0
2488 || cfun->machine->doloop_tag_from_end == is_end)
2489 {
2490 cfun->machine->doloop_tags++;
2491 cfun->machine->doloop_tag_from_end = is_end;
2492 }
2493
2494 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2495 machine_mode opmode = GET_MODE (operands[0]);
f886644b
DD
2496 gcc_assert (opmode == HImode || opmode == SImode);
2497
8d2af3a2 2498 if (is_end)
f886644b
DD
2499 emit_jump_insn (gen_doloop_end_internal (opmode, operands[0],
2500 operands[1], tag));
8d2af3a2 2501 else
f886644b
DD
2502 emit_insn (gen_doloop_begin_internal (opmode, operands[0],
2503 operands[0], tag));
8d2af3a2
DD
2504}
2505
2506
2507/* Code for converting doloop_begins and doloop_ends into valid
2508 PRU instructions. Idea and code snippets borrowed from mep port.
2509
2510 A doloop_begin is just a placeholder:
2511
2512 $count = unspec ($count)
2513
2514 where $count is initially the number of iterations.
2515 doloop_end has the form:
2516
2517 if (--$count == 0) goto label
2518
2519 The counter variable is private to the doloop insns, nothing else
2520 relies on its value.
2521
2522 There are three cases, in decreasing order of preference:
2523
2524 1. A loop has exactly one doloop_begin and one doloop_end.
2525 The doloop_end branches to the first instruction after
2526 the doloop_begin.
2527
2528 In this case we can replace the doloop_begin with a LOOP
2529 instruction and remove the doloop_end. I.e.:
2530
2531 $count1 = unspec ($count1)
2532 label:
2533 ...
2534 if (--$count2 != 0) goto label
2535
2536 becomes:
2537
2538 LOOP end_label,$count1
2539 label:
2540 ...
2541 end_label:
2542 # end loop
2543
2544 2. As for (1), except there are several doloop_ends. One of them
2545 (call it X) falls through to a label L. All the others fall
2546 through to branches to L.
2547
2548 In this case, we remove X and replace the other doloop_ends
2549 with branches to the LOOP label. For example:
2550
2551 $count1 = unspec ($count1)
2552 label:
2553 ...
2554 if (--$count1 != 0) goto label
2555 end_label:
2556 ...
2557 if (--$count2 != 0) goto label
2558 goto end_label
2559
2560 becomes:
2561
2562 LOOP end_label,$count1
2563 label:
2564 ...
2565 end_label:
2566 # end repeat
2567 ...
2568 goto end_label
2569
2570 3. The fallback case. Replace doloop_begins with:
2571
2572 $count = $count
2573
2574 Replace doloop_ends with the equivalent of:
2575
2576 $count = $count - 1
2577 if ($count != 0) goto loop_label
2578
2579 */
2580
2581/* A structure describing one doloop_begin. */
2582struct pru_doloop_begin {
2583 /* The next doloop_begin with the same tag. */
2584 struct pru_doloop_begin *next;
2585
2586 /* The instruction itself. */
2587 rtx_insn *insn;
2588
2589 /* The initial counter value. */
2590 rtx loop_count;
2591
2592 /* The counter register. */
2593 rtx counter;
2594};
2595
2596/* A structure describing a doloop_end. */
2597struct pru_doloop_end {
2598 /* The next doloop_end with the same loop tag. */
2599 struct pru_doloop_end *next;
2600
2601 /* The instruction itself. */
2602 rtx_insn *insn;
2603
2604 /* The first instruction after INSN when the branch isn't taken. */
2605 rtx_insn *fallthrough;
2606
2607 /* The location of the counter value. Since doloop_end_internal is a
2608 jump instruction, it has to allow the counter to be stored anywhere
2609 (any non-fixed register). */
2610 rtx counter;
2611
2612 /* The target label (the place where the insn branches when the counter
2613 isn't zero). */
2614 rtx label;
2615
2616 /* A scratch register. Only available when COUNTER isn't stored
2617 in a general register. */
2618 rtx scratch;
2619};
2620
2621
2622/* One do-while loop. */
2623struct pru_doloop {
2624 /* All the doloop_begins for this loop (in no particular order). */
2625 struct pru_doloop_begin *begin;
2626
2627 /* All the doloop_ends. When there is more than one, arrange things
2628 so that the first one is the most likely to be X in case (2) above. */
2629 struct pru_doloop_end *end;
2630};
2631
2632
2633/* Return true if LOOP can be converted into LOOP form
2634 (that is, if it matches cases (1) or (2) above). */
2635
2636static bool
2637pru_repeat_loop_p (struct pru_doloop *loop)
2638{
2639 struct pru_doloop_end *end;
2640 rtx_insn *fallthrough;
2641
2642 /* There must be exactly one doloop_begin and at least one doloop_end. */
2643 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2644 return false;
2645
2646 /* The first doloop_end (X) must branch back to the insn after
2647 the doloop_begin. */
2648 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2649 return false;
2650
2651 /* Check that the first doloop_end (X) can actually reach
2652 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2653 if (get_attr_length (loop->end->insn) != 4)
2654 return false;
2655
2656 /* All the other doloop_ends must branch to the same place as X.
2657 When the branch isn't taken, they must jump to the instruction
2658 after X. */
2659 fallthrough = loop->end->fallthrough;
2660 for (end = loop->end->next; end != 0; end = end->next)
2661 if (end->label != loop->end->label
2662 || !simplejump_p (end->fallthrough)
2663 || fallthrough
2664 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2665 return false;
2666
2667 return true;
2668}
2669
2670
2671/* The main repeat reorg function. See comment above for details. */
2672
2673static void
2674pru_reorg_loop (rtx_insn *insns)
2675{
2676 rtx_insn *insn;
2677 struct pru_doloop *loops, *loop;
2678 struct pru_doloop_begin *begin;
2679 struct pru_doloop_end *end;
2680 size_t tmpsz;
2681
2682 /* Quick exit if we haven't created any loops. */
2683 if (cfun->machine->doloop_tags == 0)
2684 return;
2685
2686 /* Create an array of pru_doloop structures. */
2687 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2688 loops = (struct pru_doloop *) alloca (tmpsz);
2689 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2690
2691 /* Search the function for do-while insns and group them by loop tag. */
2692 for (insn = insns; insn; insn = NEXT_INSN (insn))
2693 if (INSN_P (insn))
2694 switch (recog_memoized (insn))
2695 {
2696 case CODE_FOR_doloop_begin_internalhi:
2697 case CODE_FOR_doloop_begin_internalsi:
2698 insn_extract (insn);
2699 loop = &loops[INTVAL (recog_data.operand[2])];
2700
2701 tmpsz = sizeof (struct pru_doloop_begin);
2702 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2703 begin->next = loop->begin;
2704 begin->insn = insn;
2705 begin->loop_count = recog_data.operand[1];
2706 begin->counter = recog_data.operand[0];
2707
2708 loop->begin = begin;
2709 break;
2710
2711 case CODE_FOR_doloop_end_internalhi:
2712 case CODE_FOR_doloop_end_internalsi:
2713 insn_extract (insn);
2714 loop = &loops[INTVAL (recog_data.operand[2])];
2715
2716 tmpsz = sizeof (struct pru_doloop_end);
2717 end = (struct pru_doloop_end *) alloca (tmpsz);
2718 end->insn = insn;
2719 end->fallthrough = next_real_insn (insn);
2720 end->counter = recog_data.operand[0];
2721 end->label = recog_data.operand[1];
2722 end->scratch = recog_data.operand[3];
2723
2724 /* If this insn falls through to an unconditional jump,
2725 give it a lower priority than the others. */
2726 if (loop->end != 0 && simplejump_p (end->fallthrough))
2727 {
2728 end->next = loop->end->next;
2729 loop->end->next = end;
2730 }
2731 else
2732 {
2733 end->next = loop->end;
2734 loop->end = end;
2735 }
2736 break;
2737 }
2738
2739 /* Convert the insns for each loop in turn. */
2740 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2741 if (pru_repeat_loop_p (loop))
2742 {
2743 /* Case (1) or (2). */
2744 rtx_code_label *repeat_label;
2745 rtx label_ref;
f886644b 2746 rtx loop_rtx;
8d2af3a2
DD
2747
2748 /* Create a new label for the repeat insn. */
2749 repeat_label = gen_label_rtx ();
2750
2751 /* Replace the doloop_begin with a repeat. We get rid
2752 of the iteration register because LOOP instruction
2753 will utilize an internal for the PRU core LOOP register. */
2754 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2755 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
f886644b 2756 if (loop_mode == VOIDmode)
8d2af3a2
DD
2757 {
2758 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2759 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
f886644b 2760 loop_mode = SImode;
8d2af3a2 2761 }
f886644b
DD
2762 gcc_assert (loop_mode == HImode || loop_mode == SImode);
2763 loop_rtx = gen_pruloop (loop_mode, loop->begin->loop_count, label_ref);
2764 emit_insn_before (loop_rtx, loop->begin->insn);
2765
8d2af3a2
DD
2766 delete_insn (loop->begin->insn);
2767
2768 /* Insert the repeat label before the first doloop_end.
2769 Fill the gap with nops if LOOP insn is less than 2
2770 instructions away than loop->end. */
2771 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2772 loop->end->next != 0);
2773
2774 /* Emit a pruloop_end (to improve the readability of the output). */
2775 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2776
2777 /* HACK: TODO: This is usually not needed, but is required for
2778 a few rare cases where a JUMP that breaks the loop
2779 references the LOOP_END address. In other words, since
2780 we're missing a real "loop_end" instruction, a loop "break"
2781 may accidentally reference the loop end itself, and thus
2782 continuing the cycle. */
2783 for (insn = NEXT_INSN (loop->end->insn);
2784 insn != next_real_insn (loop->end->insn);
2785 insn = NEXT_INSN (insn))
2786 {
2787 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2788 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2789 }
2790
2791 /* Delete the first doloop_end. */
2792 delete_insn (loop->end->insn);
2793
2794 /* Replace the others with branches to REPEAT_LABEL. */
2795 for (end = loop->end->next; end != 0; end = end->next)
2796 {
2797 rtx_insn *newjmp;
2798 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2799 JUMP_LABEL (newjmp) = repeat_label;
2800 delete_insn (end->insn);
2801 delete_insn (end->fallthrough);
2802 }
2803 }
2804 else
2805 {
2806 /* Case (3). First replace all the doloop_begins with setting
2807 the HW register used for loop counter. */
2808 for (begin = loop->begin; begin != 0; begin = begin->next)
2809 {
2810 insn = gen_move_insn (copy_rtx (begin->counter),
2811 copy_rtx (begin->loop_count));
2812 emit_insn_before (insn, begin->insn);
2813 delete_insn (begin->insn);
2814 }
2815
2816 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2817 for (end = loop->end; end != 0; end = end->next)
2818 {
2819 rtx reg;
2820
2821 start_sequence ();
2822
2823 /* Load the counter value into a general register. */
2824 reg = end->counter;
2825 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2826 {
2827 reg = end->scratch;
2828 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2829 }
2830
2831 /* Decrement the counter. */
2832 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2833 constm1_rtx));
2834
2835 /* Copy it back to its original location. */
2836 if (reg != end->counter)
2837 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2838
2839 /* Jump back to the start label. */
2840 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2841 const0_rtx),
2842 reg,
2843 const0_rtx,
2844 end->label));
2845
2846 JUMP_LABEL (insn) = end->label;
2847 LABEL_NUSES (end->label)++;
2848
2849 /* Emit the whole sequence before the doloop_end. */
2850 insn = get_insns ();
2851 end_sequence ();
2852 emit_insn_before (insn, end->insn);
2853
2854 /* Delete the doloop_end. */
2855 delete_insn (end->insn);
2856 }
2857 }
2858}
2859
2860/* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2861static void
2862pru_reorg (void)
2863{
2864 rtx_insn *insns = get_insns ();
2865
2866 compute_bb_for_insn ();
2867 df_analyze ();
2868
2869 /* Need correct insn lengths for allowing LOOP instruction
2870 emitting due to U8_PCREL limitations. */
2871 shorten_branches (get_insns ());
2872
2873 /* The generic reorg_loops () is not suitable for PRU because
2874 it doesn't handle doloop_begin/end tying. And we need our
2875 doloop_begin emitted before reload. It is difficult to coalesce
2876 UBYTE constant initial loop values into the LOOP insn during
2877 machine reorg phase. */
2878 pru_reorg_loop (insns);
2879
2880 df_finish_pass (false);
2881}
2882\f
2883/* Enumerate all PRU-specific builtins. */
2884enum pru_builtin
2885{
2886 PRU_BUILTIN_DELAY_CYCLES,
5ace1776
DD
2887 PRU_BUILTIN_HALT,
2888 PRU_BUILTIN_LMBD,
8d2af3a2
DD
2889 PRU_BUILTIN_max
2890};
2891
2892static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2893
2894/* Implement TARGET_INIT_BUILTINS. */
2895
2896static void
2897pru_init_builtins (void)
2898{
2899 tree void_ftype_longlong
2900 = build_function_type_list (void_type_node,
2901 long_long_integer_type_node,
2902 NULL);
5ace1776
DD
2903 tree uint_ftype_uint_uint
2904 = build_function_type_list (unsigned_type_node,
2905 unsigned_type_node,
2906 unsigned_type_node,
2907 NULL);
2908
2909 tree void_ftype_void
2910 = build_function_type_list (void_type_node,
2911 void_type_node,
2912 NULL);
8d2af3a2
DD
2913
2914 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2915 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2916 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2917 NULL_TREE);
5ace1776
DD
2918
2919 pru_builtins[PRU_BUILTIN_HALT]
2920 = add_builtin_function ("__halt", void_ftype_void,
2921 PRU_BUILTIN_HALT, BUILT_IN_MD, NULL,
2922 NULL_TREE);
2923
2924 pru_builtins[PRU_BUILTIN_LMBD]
2925 = add_builtin_function ("__lmbd", uint_ftype_uint_uint,
2926 PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL,
2927 NULL_TREE);
8d2af3a2
DD
2928}
2929
2930/* Implement TARGET_BUILTIN_DECL. */
2931
2932static tree
2933pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2934{
2935 switch (code)
2936 {
2937 case PRU_BUILTIN_DELAY_CYCLES:
5ace1776
DD
2938 case PRU_BUILTIN_HALT:
2939 case PRU_BUILTIN_LMBD:
8d2af3a2
DD
2940 return pru_builtins[code];
2941 default:
2942 return error_mark_node;
2943 }
2944}
2945\f
2946/* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2947 code that delays exactly ARG cycles. */
2948
2949static rtx
2950pru_expand_delay_cycles (rtx arg)
2951{
2952 HOST_WIDE_INT c, n;
2953
2954 if (GET_CODE (arg) != CONST_INT)
2955 {
2956 error ("%<__delay_cycles%> only takes constant arguments");
2957 return NULL_RTX;
2958 }
2959
2960 c = INTVAL (arg);
2961
2962 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2963 if (c < 0)
2964 {
2965 error ("%<__delay_cycles%> only takes non-negative cycle counts");
2966 return NULL_RTX;
2967 }
2968
2969 emit_insn (gen_delay_cycles_start (arg));
2970
2971 /* For 32-bit loops, there's 2 + 2x cycles. */
2972 if (c > 2 * 0xffff + 1)
2973 {
2974 n = (c - 2) / 2;
2975 c -= (n * 2) + 2;
2976 if ((unsigned long long) n > 0xffffffffULL)
2977 {
2978 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
2979 return NULL_RTX;
2980 }
2981 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
2982 }
2983
2984 /* For 16-bit loops, there's 1 + 2x cycles. */
2985 if (c > 2)
2986 {
2987 n = (c - 1) / 2;
2988 c -= (n * 2) + 1;
2989
2990 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
2991 }
2992
2993 while (c > 0)
2994 {
2995 emit_insn (gen_delay_cycles_1 ());
2996 c -= 1;
2997 }
2998
2999 emit_insn (gen_delay_cycles_end (arg));
3000
3001 return NULL_RTX;
3002}
3003
3004
3005/* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
3006 a built-in function, with result going to TARGET if that's convenient
3007 (and in mode MODE if that's convenient).
3008 SUBTARGET may be used as the target for computing one of EXP's operands.
3009 IGNORE is nonzero if the value is to be ignored. */
3010
3011static rtx
5ace1776 3012pru_expand_builtin (tree exp, rtx target,
8d2af3a2 3013 rtx subtarget ATTRIBUTE_UNUSED,
5ace1776 3014 machine_mode mode,
8d2af3a2
DD
3015 int ignore ATTRIBUTE_UNUSED)
3016{
3017 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3018 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
8d2af3a2 3019
5ace1776
DD
3020 switch (fcode)
3021 {
3022 case PRU_BUILTIN_DELAY_CYCLES:
3023 {
3024 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3025 return pru_expand_delay_cycles (arg1);
3026 }
3027 break;
3028 case PRU_BUILTIN_HALT:
3029 {
3030 emit_insn (gen_pru_halt ());
3031 return NULL_RTX;
3032 }
3033 break;
3034 case PRU_BUILTIN_LMBD:
3035 {
3036 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3037 rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
3038
3039 if (target == NULL_RTX || GET_MODE (target) != mode)
3040 {
3041 target = gen_reg_rtx (mode);
3042 }
8d2af3a2 3043
5ace1776
DD
3044 emit_insn (gen_pru_lmbd (mode, target, arg1, arg2));
3045 return target;
3046 }
3047 break;
3048 default:
3049 internal_error ("bad builtin code");
3050 }
8d2af3a2
DD
3051
3052 return NULL_RTX;
3053}
3054\f
3055/* Remember the last target of pru_set_current_function. */
3056static GTY(()) tree pru_previous_fndecl;
3057
3058/* Establish appropriate back-end context for processing the function
3059 FNDECL. The argument might be NULL to indicate processing at top
3060 level, outside of any function scope. */
3061static void
3062pru_set_current_function (tree fndecl)
3063{
3064 tree old_tree = (pru_previous_fndecl
3065 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
3066 : NULL_TREE);
3067
3068 tree new_tree = (fndecl
3069 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3070 : NULL_TREE);
3071
3072 if (fndecl && fndecl != pru_previous_fndecl)
3073 {
3074 pru_previous_fndecl = fndecl;
3075 if (old_tree == new_tree)
3076 ;
3077
3078 else if (new_tree)
3079 {
ba948b37 3080 cl_target_option_restore (&global_options, &global_options_set,
8d2af3a2
DD
3081 TREE_TARGET_OPTION (new_tree));
3082 target_reinit ();
3083 }
3084
3085 else if (old_tree)
3086 {
3087 struct cl_target_option *def
3088 = TREE_TARGET_OPTION (target_option_current_node);
3089
ba948b37 3090 cl_target_option_restore (&global_options, &global_options_set, def);
8d2af3a2
DD
3091 target_reinit ();
3092 }
3093 }
3094}
3095\f
3096/* Implement TARGET_UNWIND_WORD_MODE.
3097
3098 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
3099static scalar_int_mode
3100pru_unwind_word_mode (void)
3101{
3102 return SImode;
3103}
3104\f
3105
3106/* Initialize the GCC target structure. */
3107#undef TARGET_ASM_FUNCTION_PROLOGUE
3108#define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
3109#undef TARGET_ASM_INTEGER
3110#define TARGET_ASM_INTEGER pru_assemble_integer
dda85bc2
DD
3111#undef TARGET_SECTION_TYPE_FLAGS
3112#define TARGET_SECTION_TYPE_FLAGS pru_section_type_flags
8d2af3a2
DD
3113
3114#undef TARGET_ASM_FILE_START
3115#define TARGET_ASM_FILE_START pru_file_start
3116
8bafc964
DD
3117#undef TARGET_INSERT_ATTRIBUTES
3118#define TARGET_INSERT_ATTRIBUTES pru_insert_attributes
3119
8d2af3a2
DD
3120#undef TARGET_INIT_BUILTINS
3121#define TARGET_INIT_BUILTINS pru_init_builtins
3122#undef TARGET_EXPAND_BUILTIN
3123#define TARGET_EXPAND_BUILTIN pru_expand_builtin
3124#undef TARGET_BUILTIN_DECL
3125#define TARGET_BUILTIN_DECL pru_builtin_decl
3126
3127#undef TARGET_COMPUTE_FRAME_LAYOUT
3128#define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
3129
3130#undef TARGET_FUNCTION_OK_FOR_SIBCALL
3131#define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
3132
3133#undef TARGET_CAN_ELIMINATE
3134#define TARGET_CAN_ELIMINATE pru_can_eliminate
3135
3136#undef TARGET_HARD_REGNO_MODE_OK
3137#define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
3138
3139#undef TARGET_HARD_REGNO_SCRATCH_OK
3140#define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
8d2af3a2
DD
3141
3142#undef TARGET_FUNCTION_ARG
3143#define TARGET_FUNCTION_ARG pru_function_arg
3144
3145#undef TARGET_FUNCTION_ARG_ADVANCE
3146#define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
3147
3148#undef TARGET_ARG_PARTIAL_BYTES
3149#define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
3150
3151#undef TARGET_FUNCTION_VALUE
3152#define TARGET_FUNCTION_VALUE pru_function_value
3153
3154#undef TARGET_LIBCALL_VALUE
3155#define TARGET_LIBCALL_VALUE pru_libcall_value
3156
3157#undef TARGET_FUNCTION_VALUE_REGNO_P
3158#define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
3159
3160#undef TARGET_RETURN_IN_MEMORY
3161#define TARGET_RETURN_IN_MEMORY pru_return_in_memory
3162
3163#undef TARGET_MUST_PASS_IN_STACK
3164#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
3165
8bafc964
DD
3166#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
3167#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
3168 pru_addr_space_legitimate_address_p
8d2af3a2
DD
3169
3170#undef TARGET_INIT_LIBFUNCS
3171#define TARGET_INIT_LIBFUNCS pru_init_libfuncs
3172#undef TARGET_LIBFUNC_GNU_PREFIX
3173#define TARGET_LIBFUNC_GNU_PREFIX true
3174
3175#undef TARGET_RTX_COSTS
3176#define TARGET_RTX_COSTS pru_rtx_costs
3177
3178#undef TARGET_PRINT_OPERAND
3179#define TARGET_PRINT_OPERAND pru_print_operand
3180
3181#undef TARGET_PRINT_OPERAND_ADDRESS
3182#define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3183
3184#undef TARGET_OPTION_OVERRIDE
3185#define TARGET_OPTION_OVERRIDE pru_option_override
3186
3187#undef TARGET_SET_CURRENT_FUNCTION
3188#define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3189
3190#undef TARGET_MACHINE_DEPENDENT_REORG
3191#define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3192
3193#undef TARGET_CAN_USE_DOLOOP_P
3194#define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3195
3196#undef TARGET_INVALID_WITHIN_DOLOOP
3197#define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3198
3199#undef TARGET_UNWIND_WORD_MODE
3200#define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3201
3202#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3203#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3204
3205struct gcc_target targetm = TARGET_INITIALIZER;
3206
3207#include "gt-pru.h"