]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pru/pru.cc
Workaround possible CPUID bug in Sandy Bridge.
[thirdparty/gcc.git] / gcc / config / pru / pru.cc
CommitLineData
8d2af3a2 1/* Target machine subroutines for TI PRU.
83ffe9cd 2 Copyright (C) 2014-2023 Free Software Foundation, Inc.
8d2af3a2
DD
3 Dimitar Dimitrov <dimitar@dinux.eu>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "stringpool.h"
31#include "attribs.h"
32#include "df.h"
33#include "memmodel.h"
34#include "tm_p.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40#include "output.h"
41#include "insn-attr.h"
42#include "flags.h"
43#include "explow.h"
44#include "calls.h"
45#include "varasm.h"
46#include "expr.h"
47#include "toplev.h"
48#include "langhooks.h"
49#include "cfgrtl.h"
50#include "stor-layout.h"
51#include "dumpfile.h"
52#include "builtins.h"
53#include "pru-protos.h"
54
55/* This file should be included last. */
56#include "target-def.h"
57
58#define INIT_ARRAY_ENTRY_BYTES 2
59
60/* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62struct pru_ctable_entry pru_ctable[32];
63
64/* Forward function declarations. */
65static bool prologue_saved_reg_p (int);
66static void pru_reorg_loop (rtx_insn *);
67
68struct GTY (()) machine_function
69{
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
72
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
91};
92\f
93/* Stack layout and calling conventions.
94
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
98
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
110
111 */
112
113#define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115/* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116static void
117pru_compute_frame_layout (void)
118{
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
125
126 gcc_assert (!cfun->machine->initialized);
127
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
130
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
134
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
139 {
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
142 }
143
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
155}
156
157/* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
163 */
164static rtx
165pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
167{
168 rtx insn;
169
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
175 {
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
182 {
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185 }
186 else
187 {
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190 }
191 }
192
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197 RTX_FRAME_RELATED_P (insn) = 1;
198
199 return insn;
200}
201
202/* Add a const_int to the stack pointer register. */
203static rtx
204pru_add_to_sp (int addendum, const enum reg_note kind)
205{
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
208}
209
210/* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212static int
213xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214{
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
218
219 nregs = 0;
220
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
227
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231 {
232 regno++;
233 nregs++;
234 }
235 else
236 break;
237
238 if (!nregs)
239 return -1;
240
241 gcc_assert (UBYTE_INT (*sp_offset));
242
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
254
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
258
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
261
262 if (!do_store)
263 {
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267 }
268
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
271
272 return regno_start + nregs;
273}
274
275/* Emit function prologue. */
276void
277pru_expand_prologue (void)
278{
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
284
285 total_frame_size = cfun->machine->total_size;
286
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
289
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
292 {
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
298 }
299 else if (total_frame_size)
300 {
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
304 }
305 else
306 save_regs_base = sp_offset = 0;
307
308 regno_start = 0;
309 save_offset = save_regs_base;
310 do
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
313
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
317 {
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
321
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
324 }
325
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
333}
334
335/* Emit function epilogue. */
336void
337pru_expand_epilogue (bool sibcall_p)
338{
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
342
343 if (!sibcall_p && pru_can_use_return_insn ())
344 {
345 emit_jump_insn (gen_return ());
346 return;
347 }
348
349 emit_insn (gen_blockage ());
350
351 total_frame_size = cfun->machine->total_size;
352
353 if (frame_pointer_needed)
354 {
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
359
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362 }
363 else if (!UBYTE_INT (total_frame_size))
364 {
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368 }
369 else
370 {
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
373 }
374
375 regno_start = 0;
376 do
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
379
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
382
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
386
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
392}
393
394/* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
396rtx
397pru_get_return_address (int count)
398{
399 if (count != 0)
400 return NULL_RTX;
401
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
404}
405
406/* Implement FUNCTION_PROFILER macro. */
407void
408pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409{
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
413}
414
415/* Dump stack layout. */
416static void
417pru_dump_frame_layout (FILE *file)
418{
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
438}
439
440/* Return true if REGNO should be saved in the prologue. */
441static bool
442prologue_saved_reg_p (int regno)
443{
444 gcc_assert (GP_REG_P (regno));
445
a365fa06 446 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
8d2af3a2
DD
447 return true;
448
449 /* 32-bit FP. */
450 if (frame_pointer_needed
451 && regno >= HARD_FRAME_POINTER_REGNUM
452 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453 return true;
454
455 /* 16-bit RA. */
456 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457 return true;
458 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459 return true;
460
461 return false;
462}
463
464/* Implement TARGET_CAN_ELIMINATE. */
465static bool
466pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467{
468 if (to == STACK_POINTER_REGNUM)
469 return !frame_pointer_needed;
470 return true;
471}
472
473/* Implement INITIAL_ELIMINATION_OFFSET macro. */
474int
475pru_initial_elimination_offset (int from, int to)
476{
477 int offset;
478
479 /* Set OFFSET to the offset from the stack pointer. */
480 switch (from)
481 {
482 case FRAME_POINTER_REGNUM:
483 offset = cfun->machine->out_args_size;
484 break;
485
486 case ARG_POINTER_REGNUM:
487 offset = cfun->machine->total_size;
488 offset -= crtl->args.pretend_args_size;
489 break;
490
491 default:
492 gcc_unreachable ();
493 }
494
495 /* If we are asked for the frame pointer offset, then adjust OFFSET
496 by the offset from the frame pointer to the stack pointer. */
497 if (to == HARD_FRAME_POINTER_REGNUM)
498 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501 return offset;
502}
503
504/* Return nonzero if this function is known to have a null epilogue.
505 This allows the optimizer to omit jumps to jumps if no stack
506 was created. */
507int
508pru_can_use_return_insn (void)
509{
510 if (!reload_completed || crtl->profile)
511 return 0;
512
513 return cfun->machine->total_size == 0;
514}
515\f
516/* Implement TARGET_HARD_REGNO_MODE_OK. */
517
518static bool
519pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520{
521 switch (GET_MODE_SIZE (mode))
522 {
523 case 1: return true;
524 case 2: return (regno % 4) <= 2;
525 case 4: return (regno % 4) == 0;
526 case 8: return (regno % 4) == 0;
527 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
528 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
529 default:
530 /* TODO: Find out why VOIDmode and BLKmode are passed. */
531 gcc_assert (mode == BLKmode || mode == VOIDmode);
532 return (regno % 4) == 0;
533 }
534}
535
536/* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537 Returns true if REGNO is safe to be allocated as a scratch
538 register (for a define_peephole2) in the current function. */
539
540static bool
541pru_hard_regno_scratch_ok (unsigned int regno)
542{
543 /* Don't allow hard registers that might be part of the frame pointer.
544 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545 and don't handle a frame pointer that spans more than one register.
546 TODO: Fix those faulty places. */
547
548 if ((!reload_completed || frame_pointer_needed)
549 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550 HARD_FRAME_POINTER_REGNUM + 3)
551 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552 FRAME_POINTER_REGNUM + 3)))
553 return false;
554
555 return true;
556}
557
558
8d2af3a2
DD
559/* Worker function for `HARD_REGNO_RENAME_OK'.
560 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
561
562int
563pru_hard_regno_rename_ok (unsigned int old_reg,
564 unsigned int new_reg)
565{
566 /* Don't allow hard registers that might be part of the frame pointer.
567 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
568 and don't care for a frame pointer that spans more than one register.
569 TODO: Fix those faulty places. */
570 if ((!reload_completed || frame_pointer_needed)
571 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
572 HARD_FRAME_POINTER_REGNUM + 3)
573 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
574 FRAME_POINTER_REGNUM + 3)
575 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
576 HARD_FRAME_POINTER_REGNUM + 3)
577 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
578 FRAME_POINTER_REGNUM + 3)))
579 return 0;
580
581 return 1;
582}
583\f
584/* Allocate a chunk of memory for per-function machine-dependent data. */
585static struct machine_function *
586pru_init_machine_status (void)
587{
588 return ggc_cleared_alloc<machine_function> ();
589}
590
591/* Implement TARGET_OPTION_OVERRIDE. */
592static void
593pru_option_override (void)
594{
595#ifdef SUBTARGET_OVERRIDE_OPTIONS
596 SUBTARGET_OVERRIDE_OPTIONS;
597#endif
598
599 /* Check for unsupported options. */
600 if (flag_pic == 1)
601 warning (OPT_fpic, "%<-fpic%> is not supported");
602 if (flag_pic == 2)
603 warning (OPT_fPIC, "%<-fPIC%> is not supported");
604 if (flag_pie == 1)
605 warning (OPT_fpie, "%<-fpie%> is not supported");
606 if (flag_pie == 2)
607 warning (OPT_fPIE, "%<-fPIE%> is not supported");
608
609 /* QBxx conditional branching cannot cope with block reordering. */
610 if (flag_reorder_blocks_and_partition)
611 {
612 inform (input_location, "%<-freorder-blocks-and-partition%> "
613 "not supported on this architecture");
614 flag_reorder_blocks_and_partition = 0;
615 flag_reorder_blocks = 1;
616 }
617
618 /* Function to allocate machine-dependent function status. */
619 init_machine_status = &pru_init_machine_status;
620
621 /* Save the initial options in case the user does function specific
622 options. */
623 target_option_default_node = target_option_current_node
ba948b37 624 = build_target_option_node (&global_options, &global_options_set);
8d2af3a2
DD
625
626 /* Due to difficulties in implementing the TI ABI with GCC,
627 at least check and error-out if GCC cannot compile a
628 compliant output. */
629 pru_register_abicheck_pass ();
630}
631\f
632/* Compute a (partial) cost for rtx X. Return true if the complete
633 cost has been computed, and false if subexpressions should be
634 scanned. In either case, *TOTAL contains the cost result. */
635static bool
636pru_rtx_costs (rtx x, machine_mode mode,
637 int outer_code, int opno ATTRIBUTE_UNUSED,
638 int *total, bool speed ATTRIBUTE_UNUSED)
639{
640 const int code = GET_CODE (x);
641
642 switch (code)
643 {
644 case CONST_INT:
645 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
646 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
647 {
648 *total = COSTS_N_INSNS (0);
649 return true;
650 }
651 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
652 || (mode != VOIDmode && const_uhword_operand (x, mode)))
653 {
654 *total = COSTS_N_INSNS (1);
655 return true;
656 }
657 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
658 {
659 *total = COSTS_N_INSNS (0);
660 return true;
661 }
662 else
663 {
664 *total = COSTS_N_INSNS (2);
665 return true;
666 }
667
668 case LABEL_REF:
669 case SYMBOL_REF:
670 case CONST:
671 {
672 *total = COSTS_N_INSNS (1);
673 return true;
674 }
675 case CONST_DOUBLE:
676 {
677 *total = COSTS_N_INSNS (2);
678 return true;
679 }
680 case CONST_WIDE_INT:
681 {
682 /* PRU declares no vector or very large integer types. */
683 gcc_unreachable ();
684 return true;
685 }
686 case SET:
687 {
688 int factor;
689
690 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
691 the mode for the factor. */
692 mode = GET_MODE (SET_DEST (x));
693
694 /* SI move has the same cost as a QI move. Moves larger than
695 64 bits are costly. */
696 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
697 *total = factor * COSTS_N_INSNS (1);
698
699 return false;
700 }
701
702 case MULT:
703 {
704 /* Factor in that "mul" requires fixed registers, which
705 would likely require register moves. */
706 *total = COSTS_N_INSNS (7);
707 return false;
708 }
709 case PLUS:
710 {
711 rtx op0 = XEXP (x, 0);
712 rtx op1 = XEXP (x, 1);
713 machine_mode op1_mode = GET_MODE (op1);
714
715 /* Generic RTL address expressions do not enforce mode for
716 offsets, yet our UBYTE constraint requires it. Fix it here. */
717 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
718 op1_mode = Pmode;
719 if (outer_code == MEM
720 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
721 || ctable_addr_operand (op0, VOIDmode)
722 || ctable_addr_operand (op1, VOIDmode)
723 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
724 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
725 {
726 /* CTABLE or REG base addressing - PLUS comes for free. */
727 *total = COSTS_N_INSNS (0);
728 return true;
729 }
730 else
731 {
732 *total = COSTS_N_INSNS (1);
733 return false;
734 }
735 }
736 case SIGN_EXTEND:
737 {
738 *total = COSTS_N_INSNS (3);
739 return false;
740 }
741 case ASHIFTRT:
742 {
743 rtx op1 = XEXP (x, 1);
744 if (const_1_operand (op1, VOIDmode))
745 *total = COSTS_N_INSNS (3);
746 else
747 *total = COSTS_N_INSNS (7);
748 return false;
749 }
750 case ZERO_EXTRACT:
751 {
752 rtx op2 = XEXP (x, 2);
753 if ((outer_code == EQ || outer_code == NE)
754 && CONST_INT_P (op2)
755 && INTVAL (op2) == 1)
756 {
757 /* Branch if bit is set/clear is a single instruction. */
758 *total = COSTS_N_INSNS (0);
759 return true;
760 }
761 else
762 {
763 *total = COSTS_N_INSNS (2);
764 return false;
765 }
766 }
767 case ZERO_EXTEND:
768 {
10dd6dea
DD
769 /* 64-bit zero extensions actually have a cost because they
770 require setting a register to zero.
771 32-bit and smaller are free. */
772 int factor = (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (SImode)) ? 0 : 1;
773 *total = factor * COSTS_N_INSNS (1);
8d2af3a2
DD
774 return false;
775 }
776
777 default:
778 {
779 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
780 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
781 *total = factor * COSTS_N_INSNS (1);
782 return false;
783 }
784 }
785}
786\f
787static GTY(()) rtx eqdf_libfunc;
788static GTY(()) rtx nedf_libfunc;
789static GTY(()) rtx ledf_libfunc;
790static GTY(()) rtx ltdf_libfunc;
791static GTY(()) rtx gedf_libfunc;
792static GTY(()) rtx gtdf_libfunc;
793static GTY(()) rtx eqsf_libfunc;
794static GTY(()) rtx nesf_libfunc;
795static GTY(()) rtx lesf_libfunc;
796static GTY(()) rtx ltsf_libfunc;
797static GTY(()) rtx gesf_libfunc;
798static GTY(()) rtx gtsf_libfunc;
799
800/* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
801 functions to match the PRU ABI. */
802
803static void
804pru_init_libfuncs (void)
805{
806 /* Double-precision floating-point arithmetic. */
807 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
808 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
809 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
810 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
811 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
812
813 /* Single-precision floating-point arithmetic. */
814 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
815 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
816 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
817 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
818 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
819
820 /* Floating-point comparisons. */
821 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
822 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
823 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
824 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
825 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
826 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
827 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
828 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
829 ledf_libfunc = init_one_libfunc ("__pruabi_led");
830 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
831 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
832 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
833
834 /* In PRU ABI, much like other TI processors, floating point
835 comparisons return non-standard values. This quirk is handled
836 by disabling the optab library functions, and handling the
837 comparison during RTL expansion. */
838 set_optab_libfunc (eq_optab, SFmode, NULL);
839 set_optab_libfunc (ne_optab, SFmode, NULL);
840 set_optab_libfunc (gt_optab, SFmode, NULL);
841 set_optab_libfunc (ge_optab, SFmode, NULL);
842 set_optab_libfunc (lt_optab, SFmode, NULL);
843 set_optab_libfunc (le_optab, SFmode, NULL);
844 set_optab_libfunc (eq_optab, DFmode, NULL);
845 set_optab_libfunc (ne_optab, DFmode, NULL);
846 set_optab_libfunc (gt_optab, DFmode, NULL);
847 set_optab_libfunc (ge_optab, DFmode, NULL);
848 set_optab_libfunc (lt_optab, DFmode, NULL);
849 set_optab_libfunc (le_optab, DFmode, NULL);
850
851 /* The isunordered function appears to be supported only by GCC. */
852 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
853 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
854
855 /* Floating-point to integer conversions. */
856 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
857 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
858 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
859 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
860 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
861 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
862 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
863 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
864
865 /* Conversions between floating types. */
866 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
867 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
868
869 /* Integer to floating-point conversions. */
870 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
871 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
872 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
873 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
874 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
875 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
876 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
877 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
878
879 /* Long long. */
880 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
881 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
882 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
883 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
884
885 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
886 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
887 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
888 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
889 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
890 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
891 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
892 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
893 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
894 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
895 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
896}
897
e95e91ec
DD
898/* Given a comparison CODE, return a similar comparison but without
899 the "equals" condition. In other words, it strips GE/GEU/LE/LEU
900 and instead returns GT/GTU/LT/LTU. */
901
902enum rtx_code
903pru_noteq_condition (enum rtx_code code)
904{
905 switch (code)
906 {
907 case GT: return GT;
908 case GTU: return GTU;
909 case GE: return GT;
910 case GEU: return GTU;
911 case LT: return LT;
912 case LTU: return LTU;
913 case LE: return LT;
914 case LEU: return LTU;
915 default:
916 gcc_unreachable ();
917 }
918}
8d2af3a2
DD
919
920/* Emit comparison instruction if necessary, returning the expression
921 that holds the compare result in the proper mode. Return the comparison
922 that should be used in the jump insn. */
923
924rtx
925pru_expand_fp_compare (rtx comparison, machine_mode mode)
926{
927 enum rtx_code code = GET_CODE (comparison);
928 rtx op0 = XEXP (comparison, 0);
929 rtx op1 = XEXP (comparison, 1);
930 rtx cmp;
931 enum rtx_code jump_code = code;
932 machine_mode op_mode = GET_MODE (op0);
933 rtx_insn *insns;
934 rtx libfunc;
935
936 gcc_assert (op_mode == DFmode || op_mode == SFmode);
937
938 /* FP exceptions are not raised by PRU's softfp implementation. So the
939 following transformations are safe. */
940 if (code == UNGE)
941 {
942 code = LT;
943 jump_code = EQ;
944 }
945 else if (code == UNLE)
946 {
947 code = GT;
948 jump_code = EQ;
949 }
950 else
951 jump_code = NE;
952
953 switch (code)
954 {
955 case EQ:
956 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
957 break;
958 case NE:
959 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
960 break;
961 case GT:
962 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
963 break;
964 case GE:
965 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
966 break;
967 case LT:
968 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
969 break;
970 case LE:
971 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
972 break;
973 default:
974 gcc_unreachable ();
975 }
976 start_sequence ();
977
978 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
979 op0, op_mode, op1, op_mode);
980 insns = get_insns ();
981 end_sequence ();
982
983 emit_libcall_block (insns, cmp, cmp,
984 gen_rtx_fmt_ee (code, SImode, op0, op1));
985
986 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
987}
988\f
989/* Return the sign bit position for given OP's mode. */
990static int
991sign_bit_position (const rtx op)
992{
993 const int sz = GET_MODE_SIZE (GET_MODE (op));
994
995 return sz * 8 - 1;
996}
997
10dd6dea
DD
998/* Parse the given CVAL integer value, and extract the "filling" byte
999 range of consecutive 0xff byte values. Rest of bytes must be 0x00.
1000 There must be only one range in the given value. This range would
1001 typically be used to calculate the parameters of
1002 PRU instructions ZERO and FILL.
1003
1004 The parameter MODE determines the maximum byte range to consider
1005 in the given input constant.
1006
1007 Example input:
1008 cval = 0xffffffffffffff00 = -256
1009 mode = SImode
1010 Return value:
1011 start = 1
1012 nbytes = 3
1013
1014 On error, return a range with -1 for START and NBYTES. */
1015pru_byterange
1016pru_calc_byterange (HOST_WIDE_INT cval, machine_mode mode)
8d2af3a2 1017{
10dd6dea
DD
1018 const pru_byterange invalid_range = { -1, -1 };
1019 pru_byterange r = invalid_range;
1020 enum { ST_FFS, ST_INRANGE, ST_TRAILING_ZEROS } st = ST_FFS;
1021 int i;
8d2af3a2 1022
10dd6dea 1023 for (i = 0; i < GET_MODE_SIZE (mode); i++)
8d2af3a2 1024 {
10dd6dea
DD
1025 const int b = cval & ((1U << BITS_PER_UNIT) - 1);
1026 cval >>= BITS_PER_UNIT;
1027
1028 if (b == 0x00 && (st == ST_FFS || st == ST_TRAILING_ZEROS))
1029 /* No action. */;
1030 else if (b == 0x00 && st == ST_INRANGE)
1031 st = ST_TRAILING_ZEROS;
1032 else if (b == 0xff && st == ST_FFS)
1033 {
1034 st = ST_INRANGE;
1035 r.start = i;
1036 r.nbytes = 1;
1037 }
1038 else if (b == 0xff && st == ST_INRANGE)
1039 r.nbytes++;
1040 else
1041 return invalid_range;
8d2af3a2
DD
1042 }
1043
10dd6dea
DD
1044 if (st != ST_TRAILING_ZEROS && st != ST_INRANGE)
1045 return invalid_range;
1046 return r;
8d2af3a2
DD
1047}
1048\f
1049/* Branches and compares. */
1050
1051/* PRU's ALU does not support signed comparison operations. That's why we
1052 emulate them. By first checking the sign bit and handling every possible
1053 operand sign combination, we can simulate signed comparisons in just
1054 5 instructions. See table below.
1055
1056.-------------------.---------------------------------------------------.
1057| Operand sign bit | Mapping the signed comparison to an unsigned one |
1058|---------+---------+------------+------------+------------+------------|
1059| OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1060|---------+---------+------------+------------+------------+------------|
1061| 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1062|---------+---------+------------+------------+------------+------------|
1063| 0 | 1 | false | false | true | true |
1064|---------+---------+------------+------------+------------+------------|
1065| 1 | 0 | true | true | false | false |
1066|---------+---------+------------+------------+------------+------------|
1067| 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1068`---------'---------'------------'------------'------------+------------'
1069
1070
1071Given the table above, here is an example for a concrete op:
1072 LT:
1073 qbbc OP1_POS, OP1, 31
1074 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1075 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1076 ; jmp OUT -> can be eliminated because we'll take the
1077 ; following branch. OP2.b31 is guaranteed to be 1
1078 ; by the time we get here.
1079 OP1_POS: qbbs OUT, OP2, 31
1080 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1081#if FAR_JUMP
1082 jmp OUT
1083BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1084#endif
1085 OUT:
1086
1087*/
1088
1089/* Output asm code for a signed-compare LT/LE conditional branch. */
1090static const char *
1091pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1092{
1093 static char buf[1024];
1094 enum rtx_code code = GET_CODE (operands[0]);
1095 rtx op1;
1096 rtx op2;
1097 const char *cmp_opstr;
1098 int bufi = 0;
1099
1100 op1 = operands[1];
1101 op2 = operands[2];
1102
1103 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1104
1105 /* Determine the comparison operators for positive and negative operands. */
1106 if (code == LT)
1107 cmp_opstr = "qblt";
1108 else if (code == LE)
1109 cmp_opstr = "qble";
1110 else
1111 gcc_unreachable ();
1112
1113 if (is_near)
1114 bufi = snprintf (buf, sizeof (buf),
1115 "qbbc\t.+12, %%1, %d\n\t"
1116 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1117 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1118 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1119 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1120 sign_bit_position (op1),
1121 sign_bit_position (op2),
1122 cmp_opstr,
1123 sign_bit_position (op2),
1124 cmp_opstr);
1125 else
1126 bufi = snprintf (buf, sizeof (buf),
1127 "qbbc\t.+12, %%1, %d\n\t"
1128 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1129 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1130 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1131 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1132 "jmp\t.+8\n\t" /* jmp OUT. */
1133 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1134 sign_bit_position (op1),
1135 sign_bit_position (op2),
1136 cmp_opstr,
1137 sign_bit_position (op2),
1138 cmp_opstr);
1139
1140 gcc_assert (bufi > 0);
1141 gcc_assert ((unsigned int) bufi < sizeof (buf));
1142
1143 return buf;
1144}
1145
1146/* Output asm code for a signed-compare GT/GE conditional branch. */
1147static const char *
1148pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1149{
1150 static char buf[1024];
1151 enum rtx_code code = GET_CODE (operands[0]);
1152 rtx op1;
1153 rtx op2;
1154 const char *cmp_opstr;
1155 int bufi = 0;
1156
1157 op1 = operands[1];
1158 op2 = operands[2];
1159
1160 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1161
1162 /* Determine the comparison operators for positive and negative operands. */
1163 if (code == GT)
1164 cmp_opstr = "qbgt";
1165 else if (code == GE)
1166 cmp_opstr = "qbge";
1167 else
1168 gcc_unreachable ();
1169
1170 if (is_near)
1171 bufi = snprintf (buf, sizeof (buf),
1172 "qbbs\t.+12, %%1, %d\n\t"
1173 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1174 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1175 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1176 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1177 sign_bit_position (op1),
1178 sign_bit_position (op2),
1179 cmp_opstr,
1180 sign_bit_position (op2),
1181 cmp_opstr);
1182 else
1183 bufi = snprintf (buf, sizeof (buf),
1184 "qbbs\t.+12, %%1, %d\n\t"
1185 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1186 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1187 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1188 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1189 "jmp\t.+8\n\t" /* jmp OUT. */
1190 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1191 sign_bit_position (op1),
1192 sign_bit_position (op2),
1193 cmp_opstr,
1194 sign_bit_position (op2),
1195 cmp_opstr);
1196
1197 gcc_assert (bufi > 0);
1198 gcc_assert ((unsigned int) bufi < sizeof (buf));
1199
1200 return buf;
1201}
1202
1203/* Output asm code for a signed-compare conditional branch.
1204
1205 If IS_NEAR is true, then QBBx instructions may be used for reaching
1206 the destination label. Otherwise JMP is used, at the expense of
1207 increased code size. */
1208const char *
1209pru_output_signed_cbranch (rtx *operands, bool is_near)
1210{
1211 enum rtx_code code = GET_CODE (operands[0]);
1212
1213 if (code == LT || code == LE)
1214 return pru_output_ltle_signed_cbranch (operands, is_near);
1215 else if (code == GT || code == GE)
1216 return pru_output_gtge_signed_cbranch (operands, is_near);
1217 else
1218 gcc_unreachable ();
1219}
1220
1221/* Optimized version of pru_output_signed_cbranch for constant second
1222 operand. */
1223
1224const char *
1225pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1226{
1227 static char buf[1024];
1228 enum rtx_code code = GET_CODE (operands[0]);
1229 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1230 const char *cmp_opstr;
1231 const char *rcmp_opstr;
1232
1233 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1234 code = swap_condition (code);
1235
1236 /* Determine normal and reversed comparison operators for both positive
1237 operands. This enables us to go completely unsigned.
1238
1239 NOTE: We cannot use the R print modifier because we convert signed
1240 comparison operators to unsigned ones. */
1241 switch (code)
1242 {
1243 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1244 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1245 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1246 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1247 default: gcc_unreachable ();
1248 }
1249
1250 /* OP2 is a constant unsigned byte - utilize this info to generate
1251 optimized code. We can "remove half" of the op table above because
1252 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1253 if (code == LT || code == LE)
1254 {
1255 if (is_near)
1256 snprintf (buf, sizeof (buf),
1257 "qbbs\t.+8, %%1, %d\n\t"
1258 "%s\t%%l3, %%1, %%u2",
1259 regop_sign_bit_pos,
1260 cmp_opstr);
1261 else
1262 snprintf (buf, sizeof (buf),
1263 "qbbs\t.+12, %%1, %d\n\t"
1264 "%s\t.+8, %%1, %%u2\n\t"
1265 "jmp\t%%%%label(%%l3)",
1266 regop_sign_bit_pos,
1267 rcmp_opstr);
1268 }
1269 else if (code == GT || code == GE)
1270 {
1271 if (is_near)
1272 snprintf (buf, sizeof (buf),
1273 "qbbs\t%%l3, %%1, %d\n\t"
1274 "%s\t%%l3, %%1, %%u2",
1275 regop_sign_bit_pos,
1276 cmp_opstr);
1277 else
1278 snprintf (buf, sizeof (buf),
1279 "qbbs\t.+8, %%1, %d\n\t"
1280 "%s\t.+8, %%1, %%u2\n\t"
1281 "jmp\t%%%%label(%%l3)",
1282 regop_sign_bit_pos,
1283 rcmp_opstr);
1284 }
1285 else
1286 gcc_unreachable ();
1287
1288 return buf;
1289}
1290
1291/* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1292 zero second operand. */
1293
1294const char *
1295pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1296{
1297 static char buf[1024];
1298 enum rtx_code code = GET_CODE (operands[0]);
1299 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1300
1301 /* OP2 is a constant zero - utilize this info to simply check the
1302 OP1 sign bit when comparing for LT or GE. */
1303 if (code == LT)
1304 {
1305 if (is_near)
1306 snprintf (buf, sizeof (buf),
1307 "qbbs\t%%l3, %%1, %d\n\t",
1308 regop_sign_bit_pos);
1309 else
1310 snprintf (buf, sizeof (buf),
1311 "qbbc\t.+8, %%1, %d\n\t"
1312 "jmp\t%%%%label(%%l3)",
1313 regop_sign_bit_pos);
1314 }
1315 else if (code == GE)
1316 {
1317 if (is_near)
1318 snprintf (buf, sizeof (buf),
1319 "qbbc\t%%l3, %%1, %d\n\t",
1320 regop_sign_bit_pos);
1321 else
1322 snprintf (buf, sizeof (buf),
1323 "qbbs\t.+8, %%1, %d\n\t"
1324 "jmp\t%%%%label(%%l3)",
1325 regop_sign_bit_pos);
1326 }
1327 else
1328 gcc_unreachable ();
1329
1330 return buf;
1331}
1332
1333/* Addressing Modes. */
1334
1335/* Return true if register REGNO is a valid base register.
1336 STRICT_P is true if REG_OK_STRICT is in effect. */
1337
1338bool
1339pru_regno_ok_for_base_p (int regno, bool strict_p)
1340{
1341 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1342 return true;
1343
1344 /* The fake registers will be eliminated to either the stack or
1345 hard frame pointer, both of which are usually valid base registers.
1346 Reload deals with the cases where the eliminated form isn't valid. */
1347 return (GP_REG_P (regno)
1348 || regno == FRAME_POINTER_REGNUM
1349 || regno == ARG_POINTER_REGNUM);
1350}
1351
1352/* Return true if given xbbo constant OFFSET is valid. */
1353static bool
1354pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1355{
1356 bool valid = UBYTE_INT (offset);
1357
1358 /* Reload can split multi word accesses, so make sure we can address
1359 the second word in a DI. */
1360 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1361 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1362
1363 return valid;
1364}
1365
1366/* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1367 base was not found in the pragma-filled pru_ctable. */
1368int
1369pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1370{
1371 unsigned int i;
1372
1373 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1374 {
1375 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1376 return i;
1377 }
1378 return -1;
1379}
1380
1381
1382/* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1383 and return the base CTABLE index if possible. */
1384int
1385pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1386{
1387 unsigned int i;
1388
1389 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1390 {
1391 if (pru_ctable[i].valid && IN_RANGE (caddr,
1392 pru_ctable[i].base,
1393 pru_ctable[i].base + 0xff))
1394 return i;
1395 }
1396 return -1;
1397}
1398
1399
1400/* Return the offset from some CTABLE base for this address. */
1401int
1402pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1403{
1404 int i;
1405
1406 i = pru_get_ctable_base_index (caddr);
1407 gcc_assert (i >= 0);
1408
1409 return caddr - pru_ctable[i].base;
1410}
1411
1412/* Return true if the address expression formed by BASE + OFFSET is
1413 valid.
1414
1415 Note that the following address is not handled here:
1416 base CTABLE constant base + UBYTE constant offset
1417 The constants will be folded. The ctable_addr_operand predicate will take
1418 care of the validation. The CTABLE base+offset split will happen during
1419 operand printing. */
1420static bool
1421pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1422{
1423 if (!strict_p && GET_CODE (base) == SUBREG)
1424 base = SUBREG_REG (base);
1425 if (!strict_p && GET_CODE (offset) == SUBREG)
1426 offset = SUBREG_REG (offset);
1427
1428 if (REG_P (base)
1429 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1430 && ((CONST_INT_P (offset)
1431 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1432 || (REG_P (offset)
1433 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1434 /* base register + register offset
1435 * OR base register + UBYTE constant offset. */
1436 return true;
1437 else if (REG_P (base)
1438 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1439 && ctable_base_operand (offset, VOIDmode))
1440 /* base CTABLE constant base + register offset
1441 * Note: GCC always puts the register as a first operand of PLUS. */
1442 return true;
1443 else
1444 return false;
1445}
1446
8bafc964
DD
1447/* Return register number (either for r30 or r31) which maps to the
1448 corresponding symbol OP's name in the __regio_symbol address namespace.
1449
1450 If no mapping can be established (i.e. symbol name is invalid), then
1451 return -1. */
1452int pru_symref2ioregno (rtx op)
1453{
1454 if (!SYMBOL_REF_P (op))
1455 return -1;
1456
1457 const char *name = XSTR (op, 0);
1458 if (!strcmp (name, "__R30"))
1459 return R30_REGNUM;
1460 else if (!strcmp (name, "__R31"))
1461 return R31_REGNUM;
1462 else
1463 return -1;
1464}
1465
1466/* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. */
8d2af3a2 1467static bool
8bafc964
DD
1468pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand,
1469 bool strict_p, addr_space_t as)
8d2af3a2 1470{
8bafc964
DD
1471 if (as == ADDR_SPACE_REGIO)
1472 {
1473 /* Address space constraints for __regio_symbol have been checked in
1474 TARGET_INSERT_ATTRIBUTES, and some more checks will be done
1475 during RTL expansion of "mov<mode>". */
1476 return true;
1477 }
1478 else if (as != ADDR_SPACE_GENERIC)
1479 {
1480 gcc_unreachable ();
1481 }
1482
8d2af3a2
DD
1483 switch (GET_CODE (operand))
1484 {
1485 /* Direct. */
1486 case SYMBOL_REF:
1487 case LABEL_REF:
1488 case CONST:
1489 case CONST_WIDE_INT:
1490 return false;
1491
1492 case CONST_INT:
1493 return ctable_addr_operand (operand, VOIDmode);
1494
1495 /* Register indirect. */
1496 case REG:
1497 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1498
1499 /* Register indirect with displacement. */
1500 case PLUS:
1501 {
1502 rtx op0 = XEXP (operand, 0);
1503 rtx op1 = XEXP (operand, 1);
1504
1505 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1506 }
1507
1508 default:
1509 break;
1510 }
1511 return false;
1512}
1513\f
1514/* Output assembly language related definitions. */
1515
1516/* Implement TARGET_ASM_CONSTRUCTOR. */
1517static void
1518pru_elf_asm_constructor (rtx symbol, int priority)
1519{
1520 char buf[23];
1521 section *s;
1522
1523 if (priority == DEFAULT_INIT_PRIORITY)
1524 snprintf (buf, sizeof (buf), ".init_array");
1525 else
1526 {
1527 /* While priority is known to be in range [0, 65535], so 18 bytes
1528 would be enough, the compiler might not know that. To avoid
1529 -Wformat-truncation false positive, use a larger size. */
1530 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1531 }
1532 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1533 switch_to_section (s);
1534 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1535}
1536
1537/* Implement TARGET_ASM_DESTRUCTOR. */
1538static void
1539pru_elf_asm_destructor (rtx symbol, int priority)
1540{
1541 char buf[23];
1542 section *s;
1543
1544 if (priority == DEFAULT_INIT_PRIORITY)
1545 snprintf (buf, sizeof (buf), ".fini_array");
1546 else
1547 {
1548 /* While priority is known to be in range [0, 65535], so 18 bytes
1549 would be enough, the compiler might not know that. To avoid
1550 -Wformat-truncation false positive, use a larger size. */
1551 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1552 }
1553 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1554 switch_to_section (s);
1555 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1556}
1557
1558/* Map rtx_code to unsigned PRU branch op suffix. Callers must
1559 handle sign comparison themselves for signed operations. */
1560static const char *
1561pru_comparison_str (enum rtx_code cond)
1562{
1563 switch (cond)
1564 {
1565 case NE: return "ne";
1566 case EQ: return "eq";
1567 case GEU: return "ge";
1568 case GTU: return "gt";
1569 case LEU: return "le";
1570 case LTU: return "lt";
1571 default: gcc_unreachable ();
1572 }
1573}
1574
1575/* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1576 the bit representation of X by "casting" it to CONST_INT. */
1577
1578static rtx
1579pru_to_int_mode (rtx x)
1580{
1581 machine_mode mode = GET_MODE (x);
1582
1583 return VOIDmode == mode
1584 ? x
1585 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1586}
1587
1588/* Translate between the MachineDescription notion
1589 of 8-bit consecutive registers, to the PRU
1590 assembler syntax of REGWORD[.SUBREG]. */
1591static const char *
1592pru_asm_regname (rtx op)
1593{
1594 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1595 int speci, regi;
1596
1597 gcc_assert (REG_P (op));
1598
1599 if (!canon_reg_names[0][0][0])
1600 {
1601 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1602 for (speci = 0; speci < 3; speci++)
1603 {
1604 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1605 if ((regi + sz) > (32 * 4))
1606 continue; /* Invalid entry. */
1607
1608 /* Construct the lookup table. */
1609 const char *suffix = "";
1610
1611 switch ((sz << 8) | (regi % 4))
1612 {
1613 case (1 << 8) | 0: suffix = ".b0"; break;
1614 case (1 << 8) | 1: suffix = ".b1"; break;
1615 case (1 << 8) | 2: suffix = ".b2"; break;
1616 case (1 << 8) | 3: suffix = ".b3"; break;
1617 case (2 << 8) | 0: suffix = ".w0"; break;
1618 case (2 << 8) | 1: suffix = ".w1"; break;
1619 case (2 << 8) | 2: suffix = ".w2"; break;
1620 case (4 << 8) | 0: suffix = ""; break;
1621 default:
1622 /* Invalid entry. */
1623 continue;
1624 }
1625 sprintf (&canon_reg_names[speci][regi][0],
1626 "r%d%s", regi / 4, suffix);
1627 }
1628 }
1629
1630 switch (GET_MODE_SIZE (GET_MODE (op)))
1631 {
1632 case 1: speci = 0; break;
1633 case 2: speci = 1; break;
1634 case 4: speci = 2; break;
1635 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1636 default: gcc_unreachable ();
1637 }
1638 regi = REGNO (op);
1639 gcc_assert (regi < LAST_GP_REGNUM);
1640 gcc_assert (canon_reg_names[speci][regi][0]);
1641
1642 return &canon_reg_names[speci][regi][0];
1643}
1644
1645/* Print the operand OP to file stream FILE modified by LETTER.
1646 LETTER can be one of:
1647
1648 b: prints the register byte start (used by LBBO/SBBO).
1649 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1650 F: Full 32-bit register.
1651 H: Higher 16-bits of a const_int operand.
1652 L: Lower 16-bits of a const_int operand.
1653 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1654 P: prints swapped condition.
1655 Q: prints swapped and reversed condition.
1656 R: prints reversed condition.
1657 S: print operand mode size (but do not print the operand itself).
1658 T: print exact_log2 () for const_int operands.
1659 u: print QI constant integer as unsigned. No transformation for regs.
1660 V: print exact_log2 () of negated const_int operands.
1661 w: Lower 32-bits of a const_int operand.
1662 W: Upper 32-bits of a const_int operand.
8d2af3a2
DD
1663*/
1664static void
1665pru_print_operand (FILE *file, rtx op, int letter)
1666{
1667 switch (letter)
1668 {
1669 case 'S':
1670 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1671 return;
1672
1673 default:
1674 break;
1675 }
1676
1677 if (comparison_operator (op, VOIDmode))
1678 {
1679 enum rtx_code cond = GET_CODE (op);
1680 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1681
1682 switch (letter)
1683 {
1684 case 0:
1685 fprintf (file, "%s", pru_comparison_str (cond));
1686 return;
1687 case 'P':
1688 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1689 return;
1690 case 'Q':
1691 cond = swap_condition (cond);
3d1ca857 1692 /* Fall through. */
8d2af3a2
DD
1693 case 'R':
1694 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1695 return;
1696 }
1697 }
1698
1699 switch (GET_CODE (op))
1700 {
1701 case REG:
1702 if (letter == 0 || letter == 'u')
1703 {
1704 fprintf (file, "%s", pru_asm_regname (op));
1705 return;
1706 }
1707 else if (letter == 'b')
1708 {
1709 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1710 {
1711 output_operand_lossage ("I/O register operand for '%%%c'",
1712 letter);
1713 return;
1714 }
1715 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1716 return;
1717 }
1718 else if (letter == 'F' || letter == 'N')
1719 {
1720 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1721 {
1722 output_operand_lossage ("I/O register operand for '%%%c'",
1723 letter);
1724 return;
1725 }
1726 if (REGNO (op) % 4 != 0)
1727 {
1728 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1729 letter);
1730 return;
1731 }
1732 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1733 return;
1734 }
8d2af3a2
DD
1735 break;
1736
1737 case CONST_INT:
1738 if (letter == 'H')
1739 {
1740 HOST_WIDE_INT val = INTVAL (op);
1741 val = (val >> 16) & 0xFFFF;
1742 output_addr_const (file, gen_int_mode (val, SImode));
1743 return;
1744 }
1745 else if (letter == 'L')
1746 {
1747 HOST_WIDE_INT val = INTVAL (op);
1748 val &= 0xFFFF;
1749 output_addr_const (file, gen_int_mode (val, SImode));
1750 return;
1751 }
1752 else if (letter == 'T')
1753 {
1754 /* The predicate should have already validated the 1-high-bit
1755 requirement. Use CTZ here to deal with constant's sign
1756 extension. */
1757 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1758 if (val < 0 || val > 31)
1759 {
1760 output_operand_lossage ("invalid operand for '%%%c'", letter);
1761 return;
1762 }
1763 output_addr_const (file, gen_int_mode (val, SImode));
1764 return;
1765 }
1766 else if (letter == 'V')
1767 {
1768 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1769 if (val < 0 || val > 31)
1770 {
1771 output_operand_lossage ("invalid operand for '%%%c'", letter);
1772 return;
1773 }
1774 output_addr_const (file, gen_int_mode (val, SImode));
1775 return;
1776 }
1777 else if (letter == 'w')
1778 {
1779 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1780 output_addr_const (file, gen_int_mode (val, SImode));
1781 return;
1782 }
1783 else if (letter == 'W')
1784 {
1785 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1786 output_addr_const (file, gen_int_mode (val, SImode));
1787 return;
1788 }
1789 else if (letter == 'u')
1790 {
1791 /* Workaround GCC's representation of QI constants in sign-extended
1792 form, and PRU's assembler insistence on unsigned constant
1793 integers. See the notes about O constraint. */
1794 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1795 return;
1796 }
1797 /* Else, fall through. */
1798
1799 case CONST:
1800 case LABEL_REF:
1801 case SYMBOL_REF:
1802 if (letter == 0)
1803 {
1804 output_addr_const (file, op);
1805 return;
1806 }
1807 break;
1808
1809 case CONST_FIXED:
1810 {
1811 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1812 if (letter != 0)
1813 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1814 letter);
1815 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1816 return;
1817 }
1818 break;
1819
1820 case CONST_DOUBLE:
1821 if (letter == 0)
1822 {
1823 long val;
1824
1825 if (GET_MODE (op) != SFmode)
1826 {
1827 output_operand_lossage ("double constants not supported");
1828 return;
1829 }
1830 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1831 fprintf (file, "0x%lx", val);
1832 return;
1833 }
1834 else if (letter == 'w' || letter == 'W')
1835 {
1836 long t[2];
1837 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1838 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1839 return;
1840 }
1841 else
1842 {
1843 output_operand_lossage ("invalid operand for '%%%c'", letter);
1844 return;
1845 }
1846 break;
1847
1848 case SUBREG:
1849 /* Subregs should not appear at so late stage. */
1850 gcc_unreachable ();
1851 break;
1852
1853 case MEM:
1854 if (letter == 0)
1855 {
1856 output_address (VOIDmode, op);
1857 return;
1858 }
1859 else if (letter == 'B')
1860 {
1861 rtx base = XEXP (op, 0);
1862 if (GET_CODE (base) == PLUS)
1863 {
1864 rtx op0 = XEXP (base, 0);
1865 rtx op1 = XEXP (base, 1);
1866
1867 /* PLUS cannot have two constant operands, so first one
1868 of them must be a REG, hence we must check for an
1869 exact base address. */
1870 if (ctable_base_operand (op1, VOIDmode))
1871 {
1872 fprintf (file, "c");
1873 return;
1874 }
1875 else if (REG_P (op0))
1876 {
1877 fprintf (file, "b");
1878 return;
1879 }
1880 else
1881 gcc_unreachable ();
1882 }
1883 else if (REG_P (base))
1884 {
1885 fprintf (file, "b");
1886 return;
1887 }
1888 else if (ctable_addr_operand (base, VOIDmode))
1889 {
1890 fprintf (file, "c");
1891 return;
1892 }
1893 else
1894 gcc_unreachable ();
1895 }
1896 break;
1897
1898 case CODE_LABEL:
1899 if (letter == 0)
1900 {
1901 output_addr_const (file, op);
1902 return;
1903 }
1904 break;
1905
1906 default:
1907 break;
1908 }
1909
1910 output_operand_lossage ("unsupported operand %s for code '%c'",
1911 GET_RTX_NAME (GET_CODE (op)), letter);
1912}
1913
1914/* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1915static void
1916pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1917{
1918 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1919 {
1920 output_operand_lossage ("unexpected text address:");
1921 return;
1922 }
1923
1924 switch (GET_CODE (op))
1925 {
1926 case CONST:
1927 case LABEL_REF:
1928 case CONST_WIDE_INT:
1929 case SYMBOL_REF:
1930 break;
1931
1932 case CONST_INT:
1933 {
1934 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1935 int base = pru_get_ctable_base_index (caddr);
1936 int offs = pru_get_ctable_base_offset (caddr);
1937 if (base < 0)
1938 {
1939 output_operand_lossage ("unsupported constant address:");
1940 return;
1941 }
1942 fprintf (file, "%d, %d", base, offs);
1943 return;
1944 }
1945 break;
1946
1947 case PLUS:
1948 {
1949 int base;
1950 rtx op0 = XEXP (op, 0);
1951 rtx op1 = XEXP (op, 1);
1952
1953 if (REG_P (op0) && CONST_INT_P (op1)
1954 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1955 {
1956 base = pru_get_ctable_exact_base_index (INTVAL (op1));
1957 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1958 return;
1959 }
1960 else if (REG_P (op1) && CONST_INT_P (op0)
1961 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1962 {
1963 /* Not a valid RTL. */
1964 gcc_unreachable ();
1965 }
1966 else if (REG_P (op0) && CONSTANT_P (op1))
1967 {
1968 fprintf (file, "%s, ", pru_asm_regname (op0));
1969 output_addr_const (file, op1);
1970 return;
1971 }
1972 else if (REG_P (op1) && CONSTANT_P (op0))
1973 {
1974 /* Not a valid RTL. */
1975 gcc_unreachable ();
1976 }
1977 else if (REG_P (op1) && REG_P (op0))
1978 {
1979 fprintf (file, "%s, %s", pru_asm_regname (op0),
1980 pru_asm_regname (op1));
1981 return;
1982 }
1983 }
1984 break;
1985
1986 case REG:
1987 fprintf (file, "%s, 0", pru_asm_regname (op));
1988 return;
1989
1990 case MEM:
1991 {
1992 rtx base = XEXP (op, 0);
1993 pru_print_operand_address (file, mode, base);
1994 return;
1995 }
1996 default:
1997 break;
1998 }
1999
2000 output_operand_lossage ("unsupported memory expression:");
2001}
2002
2003/* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
2004static void
2005pru_asm_function_prologue (FILE *file)
2006{
2007 if (flag_verbose_asm || flag_debug_asm)
2008 pru_dump_frame_layout (file);
2009}
2010
2011/* Implement `TARGET_ASM_INTEGER'.
2012 Target hook for assembling integer objects. PRU version needs
2013 special handling for references to pmem. Code copied from AVR. */
2014
2015static bool
2016pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
2017{
2018 if (size == POINTER_SIZE / BITS_PER_UNIT
2019 && aligned_p
2020 && text_segment_operand (x, VOIDmode))
2021 {
2022 fputs ("\t.4byte\t%pmem(", asm_out_file);
2023 output_addr_const (asm_out_file, x);
2024 fputs (")\n", asm_out_file);
2025
2026 return true;
2027 }
2028 else if (size == INIT_ARRAY_ENTRY_BYTES
2029 && aligned_p
2030 && text_segment_operand (x, VOIDmode))
2031 {
2032 fputs ("\t.2byte\t%pmem(", asm_out_file);
2033 output_addr_const (asm_out_file, x);
2034 fputs (")\n", asm_out_file);
2035
2036 return true;
2037 }
2038 else
2039 {
2040 return default_assemble_integer (x, size, aligned_p);
2041 }
2042}
2043
dda85bc2
DD
2044/* Implement TARGET_SECTION_TYPE_FLAGS. */
2045
2046static unsigned int
2047pru_section_type_flags (tree decl, const char *name, int reloc)
2048{
2049 unsigned int flags = default_section_type_flags (decl, name, reloc);
2050
2051 /* The .pru_irq_map section is not meant to be loaded into the target
2052 memory. Instead its contents are read by the host remoteproc loader.
2053 To prevent being marked as a loadable (allocated) section, the
2054 .pru_irq_map section is intercepted and marked as a debug section. */
2055 if (!strcmp (name, ".pru_irq_map"))
2056 flags = SECTION_DEBUG | SECTION_RETAIN;
2057
2058 return flags;
2059}
2060
8d2af3a2
DD
2061/* Implement TARGET_ASM_FILE_START. */
2062
2063static void
2064pru_file_start (void)
2065{
2066 default_file_start ();
2067
2068 /* Compiler will take care of placing %label, so there is no
2069 need to confuse users with this warning. */
2070 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2071}
8bafc964
DD
2072
2073/* Scan type TYP for pointer references to address space other than
2074 ADDR_SPACE_GENERIC. Return true if such reference is found.
2075 Much of this code was taken from the avr port. */
2076
2077static bool
2078pru_nongeneric_pointer_addrspace (tree typ)
2079{
2080 while (ARRAY_TYPE == TREE_CODE (typ))
2081 typ = TREE_TYPE (typ);
2082
2083 if (POINTER_TYPE_P (typ))
2084 {
2085 addr_space_t as;
2086 tree target = TREE_TYPE (typ);
2087
2088 /* Pointer to function: Test the function's return type. */
2089 if (FUNCTION_TYPE == TREE_CODE (target))
2090 return pru_nongeneric_pointer_addrspace (TREE_TYPE (target));
2091
2092 /* "Ordinary" pointers... */
2093
2094 while (TREE_CODE (target) == ARRAY_TYPE)
2095 target = TREE_TYPE (target);
2096
2097 as = TYPE_ADDR_SPACE (target);
2098
2099 if (!ADDR_SPACE_GENERIC_P (as))
2100 return true;
2101
2102 /* Scan pointer's target type. */
2103 return pru_nongeneric_pointer_addrspace (target);
2104 }
2105
2106 return false;
2107}
2108
2109/* Implement `TARGET_INSERT_ATTRIBUTES'. For PRU it's used as a hook to
2110 provide better diagnostics for some invalid usages of the __regio_symbol
2111 address space.
2112
2113 Any escapes of the following checks are supposed to be caught
2114 during the "mov<mode>" pattern expansion. */
2115
2116static void
2117pru_insert_attributes (tree node, tree *attributes ATTRIBUTE_UNUSED)
2118{
2119
2120 /* Validate __regio_symbol variable declarations. */
2121 if (VAR_P (node))
2122 {
2123 const char *name = DECL_NAME (node)
2124 ? IDENTIFIER_POINTER (DECL_NAME (node))
2125 : "<unknown>";
2126 tree typ = TREE_TYPE (node);
2127 addr_space_t as = TYPE_ADDR_SPACE (typ);
2128
2129 if (as == ADDR_SPACE_GENERIC)
2130 return;
2131
2132 if (AGGREGATE_TYPE_P (typ))
2133 {
2134 error ("aggregate types are prohibited in "
2135 "%<__regio_symbol%> address space");
2136 /* Don't bother anymore. Below checks would pile
2137 meaningless errors, which would confuse user. */
2138 return;
2139 }
2140 if (DECL_INITIAL (node) != NULL_TREE)
2141 error ("variables in %<__regio_symbol%> address space "
2142 "cannot have initial value");
2143 if (DECL_REGISTER (node))
2144 error ("variables in %<__regio_symbol%> address space "
2145 "cannot be declared %<register%>");
2146 if (!TYPE_VOLATILE (typ))
2147 error ("variables in %<__regio_symbol%> address space "
2148 "must be declared %<volatile%>");
2149 if (!DECL_EXTERNAL (node))
2150 error ("variables in %<__regio_symbol%> address space "
2151 "must be declared %<extern%>");
2152 if (TYPE_MODE (typ) != SImode)
2153 error ("only 32-bit access is supported "
2154 "for %<__regio_symbol%> address space");
2155 if (strcmp (name, "__R30") != 0 && strcmp (name, "__R31") != 0)
2156 error ("register name %<%s%> not recognized "
2157 "in %<__regio_symbol%> address space", name);
2158 }
2159
2160 tree typ = NULL_TREE;
2161
2162 switch (TREE_CODE (node))
2163 {
2164 case FUNCTION_DECL:
2165 typ = TREE_TYPE (TREE_TYPE (node));
2166 break;
2167 case TYPE_DECL:
2168 case RESULT_DECL:
2169 case VAR_DECL:
2170 case FIELD_DECL:
2171 case PARM_DECL:
2172 typ = TREE_TYPE (node);
2173 break;
2174 case POINTER_TYPE:
2175 typ = node;
2176 break;
2177 default:
2178 break;
2179 }
2180 if (typ != NULL_TREE && pru_nongeneric_pointer_addrspace (typ))
2181 error ("pointers to %<__regio_symbol%> address space are prohibited");
2182}
8d2af3a2
DD
2183\f
2184/* Function argument related. */
2185
2186/* Return the number of bytes needed for storing an argument with
2187 the given MODE and TYPE. */
2188static int
2189pru_function_arg_size (machine_mode mode, const_tree type)
2190{
2191 HOST_WIDE_INT param_size;
2192
2193 if (mode == BLKmode)
2194 param_size = int_size_in_bytes (type);
2195 else
2196 param_size = GET_MODE_SIZE (mode);
2197
2198 /* Convert to words (round up). */
2199 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2200 gcc_assert (param_size >= 0);
2201
2202 return param_size;
2203}
2204
2205/* Check if argument with the given size must be
2206 passed/returned in a register.
2207
2208 Reference:
2209 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2210
2211 Arguments other than 8/16/24/32/64bits are passed on stack. */
2212static bool
2213pru_arg_in_reg_bysize (size_t sz)
2214{
2215 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2216}
2217
2218/* Helper function to get the starting storage HW register for an argument,
2219 or -1 if it must be passed on stack. The cum_v state is not changed. */
2220static int
2221pru_function_arg_regi (cumulative_args_t cum_v,
2222 machine_mode mode, const_tree type,
2223 bool named)
2224{
2225 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2226 size_t argsize = pru_function_arg_size (mode, type);
2227 size_t i, bi;
2228 int regi = -1;
2229
2230 if (!pru_arg_in_reg_bysize (argsize))
2231 return -1;
2232
2233 if (!named)
2234 return -1;
2235
2236 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2237 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2238 {
2239 /* VLAs and vector types are not defined in the PRU ABI. Let's
2240 handle them the same as their same-sized counterparts. This way
2241 we do not need to treat BLKmode differently, and need only to check
2242 the size. */
2243 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2244 || argsize == 4 || argsize == 8);
2245
2246 /* Ensure SI and DI arguments are stored in full registers only. */
2247 if ((argsize >= 4) && (i % 4) != 0)
2248 continue;
2249
2250 /* Structures with size 24 bits are passed starting at a full
2251 register boundary. */
2252 if (argsize == 3 && (i % 4) != 0)
2253 continue;
2254
2255 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2256 into a different full register. */
2257 if (argsize == 2 && (i % 4) == 3)
2258 continue;
2259
2260 for (bi = 0;
2261 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2262 bi++)
2263 {
2264 if (cum->regs_used[bi + i])
2265 break;
2266 }
2267 if (bi == argsize)
2268 regi = FIRST_ARG_REGNUM + i;
2269 }
2270
2271 return regi;
2272}
2273
2274/* Mark CUM_V that a function argument will occupy HW register slot starting
2275 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2276 depends on the MODE and TYPE of the argument. */
2277static void
2278pru_function_arg_regi_mark_slot (int regi,
2279 cumulative_args_t cum_v,
2280 machine_mode mode, const_tree type,
2281 bool named)
2282{
2283 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2284 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2285
2286 gcc_assert (named);
2287
2288 /* Mark all byte sub-registers occupied by argument as used. */
2289 while (param_size--)
2290 {
2291 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2292 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2293 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2294 regi++;
2295 }
2296}
2297
2298/* Define where to put the arguments to a function. Value is zero to
2299 push the argument on the stack, or a hard register in which to
2300 store the argument.
2301
8d2af3a2
DD
2302 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2303 the preceding args and about the function being called.
6783fdb7 2304 ARG is a description of the argument. */
8d2af3a2
DD
2305
2306static rtx
6783fdb7 2307pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
8d2af3a2
DD
2308{
2309 rtx return_rtx = NULL_RTX;
6783fdb7 2310 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
8d2af3a2
DD
2311
2312 if (regi >= 0)
6783fdb7 2313 return_rtx = gen_rtx_REG (arg.mode, regi);
8d2af3a2
DD
2314
2315 return return_rtx;
2316}
2317
2318/* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2319 between registers and memory, so we can return 0. */
2320
2321static int
a7c81bc1 2322pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
8d2af3a2
DD
2323{
2324 return 0;
2325}
2326
6930c98c 2327/* Update the data in CUM to advance over argument ARG. */
8d2af3a2
DD
2328
2329static void
6930c98c
RS
2330pru_function_arg_advance (cumulative_args_t cum_v,
2331 const function_arg_info &arg)
8d2af3a2 2332{
6930c98c 2333 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
8d2af3a2
DD
2334
2335 if (regi >= 0)
6930c98c
RS
2336 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2337 arg.type, arg.named);
8d2af3a2
DD
2338}
2339
2340/* Implement TARGET_FUNCTION_VALUE. */
2341static rtx
2342pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2343 bool outgoing ATTRIBUTE_UNUSED)
2344{
2345 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2346}
2347
2348/* Implement TARGET_LIBCALL_VALUE. */
2349static rtx
2350pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2351{
2352 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2353}
2354
2355/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2356static bool
2357pru_function_value_regno_p (const unsigned int regno)
2358{
2359 return regno == FIRST_RETVAL_REGNUM;
2360}
2361
2362/* Implement TARGET_RETURN_IN_MEMORY. */
2363bool
2364pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2365{
2366 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2367 || int_size_in_bytes (type) == -1);
2368
2369 return in_memory;
2370}
2371\f
2372/* Implement TARGET_CAN_USE_DOLOOP_P. */
2373
2374static bool
2375pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2376 unsigned int loop_depth, bool)
2377{
2378 /* Considering limitations in the hardware, only use doloop
2379 for innermost loops which must be entered from the top. */
2380 if (loop_depth > 1)
2381 return false;
2382 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2383 holds the maximum number of loop latch executions, while PRU loop
2384 instruction needs the count of loop body executions. */
2385 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2386 return false;
2387
2388 return true;
2389}
2390
2391/* NULL if INSN insn is valid within a low-overhead loop.
2392 Otherwise return why doloop cannot be applied. */
2393
2394static const char *
2395pru_invalid_within_doloop (const rtx_insn *insn)
2396{
2397 if (CALL_P (insn))
2398 return "Function call in the loop.";
2399
2400 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2401 return "Return from a call instruction in the loop.";
2402
2403 if (NONDEBUG_INSN_P (insn)
2404 && INSN_CODE (insn) < 0
2405 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2406 || asm_noperands (PATTERN (insn)) >= 0))
2407 return "Loop contains asm statement.";
2408
2409 return NULL;
2410}
2411
2412
2413/* Figure out where to put LABEL, which is the label for a repeat loop.
2414 The loop ends just before LAST_INSN. If SHARED, insns other than the
2415 "repeat" might use LABEL to jump to the loop's continuation point.
2416
2417 Return the last instruction in the adjusted loop. */
2418
2419static rtx_insn *
2420pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2421 bool shared)
2422{
2423 rtx_insn *next, *prev;
2424 int count = 0, code, icode;
2425
2426 if (dump_file)
2427 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2428 INSN_UID (last_insn));
2429
2430 /* Set PREV to the last insn in the loop. */
2431 prev = PREV_INSN (last_insn);
2432
2433 /* Set NEXT to the next insn after the loop label. */
2434 next = last_insn;
2435 if (!shared)
2436 while (prev != 0)
2437 {
2438 code = GET_CODE (prev);
2439 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2440 break;
2441
2442 if (INSN_P (prev))
2443 {
2444 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2445 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2446
2447 /* Other insns that should not be in the last two opcodes. */
2448 icode = recog_memoized (prev);
2449 if (icode < 0
2450 || icode == CODE_FOR_pruloophi
2451 || icode == CODE_FOR_pruloopsi)
2452 break;
2453
2454 count++;
2455 next = prev;
2456 if (dump_file)
2457 print_rtl_single (dump_file, next);
2458 if (count == 2)
2459 break;
2460 }
2461 prev = PREV_INSN (prev);
2462 }
2463
2464 /* Insert the nops. */
2465 if (dump_file && count < 2)
2466 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2467 2 - count, count == 1 ? "" : "s");
2468
2469 for (; count < 2; count++)
2470 emit_insn_before (gen_nop (), last_insn);
2471
2472 /* Insert the label. */
2473 emit_label_before (label, last_insn);
2474
2475 return last_insn;
2476}
2477
2478/* If IS_END is false, expand a canonical doloop_begin RTL into the
2479 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2480 doloop_end_internal. */
2481void
2482pru_emit_doloop (rtx *operands, int is_end)
2483{
2484 rtx tag;
2485
2486 if (cfun->machine->doloop_tags == 0
2487 || cfun->machine->doloop_tag_from_end == is_end)
2488 {
2489 cfun->machine->doloop_tags++;
2490 cfun->machine->doloop_tag_from_end = is_end;
2491 }
2492
2493 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2494 machine_mode opmode = GET_MODE (operands[0]);
f886644b
DD
2495 gcc_assert (opmode == HImode || opmode == SImode);
2496
8d2af3a2 2497 if (is_end)
f886644b
DD
2498 emit_jump_insn (gen_doloop_end_internal (opmode, operands[0],
2499 operands[1], tag));
8d2af3a2 2500 else
f886644b
DD
2501 emit_insn (gen_doloop_begin_internal (opmode, operands[0],
2502 operands[0], tag));
8d2af3a2
DD
2503}
2504
2505
2506/* Code for converting doloop_begins and doloop_ends into valid
2507 PRU instructions. Idea and code snippets borrowed from mep port.
2508
2509 A doloop_begin is just a placeholder:
2510
2511 $count = unspec ($count)
2512
2513 where $count is initially the number of iterations.
2514 doloop_end has the form:
2515
2516 if (--$count == 0) goto label
2517
2518 The counter variable is private to the doloop insns, nothing else
2519 relies on its value.
2520
2521 There are three cases, in decreasing order of preference:
2522
2523 1. A loop has exactly one doloop_begin and one doloop_end.
2524 The doloop_end branches to the first instruction after
2525 the doloop_begin.
2526
2527 In this case we can replace the doloop_begin with a LOOP
2528 instruction and remove the doloop_end. I.e.:
2529
2530 $count1 = unspec ($count1)
2531 label:
2532 ...
2533 if (--$count2 != 0) goto label
2534
2535 becomes:
2536
2537 LOOP end_label,$count1
2538 label:
2539 ...
2540 end_label:
2541 # end loop
2542
2543 2. As for (1), except there are several doloop_ends. One of them
2544 (call it X) falls through to a label L. All the others fall
2545 through to branches to L.
2546
2547 In this case, we remove X and replace the other doloop_ends
2548 with branches to the LOOP label. For example:
2549
2550 $count1 = unspec ($count1)
2551 label:
2552 ...
2553 if (--$count1 != 0) goto label
2554 end_label:
2555 ...
2556 if (--$count2 != 0) goto label
2557 goto end_label
2558
2559 becomes:
2560
2561 LOOP end_label,$count1
2562 label:
2563 ...
2564 end_label:
2565 # end repeat
2566 ...
2567 goto end_label
2568
2569 3. The fallback case. Replace doloop_begins with:
2570
2571 $count = $count
2572
2573 Replace doloop_ends with the equivalent of:
2574
2575 $count = $count - 1
2576 if ($count != 0) goto loop_label
2577
2578 */
2579
2580/* A structure describing one doloop_begin. */
2581struct pru_doloop_begin {
2582 /* The next doloop_begin with the same tag. */
2583 struct pru_doloop_begin *next;
2584
2585 /* The instruction itself. */
2586 rtx_insn *insn;
2587
2588 /* The initial counter value. */
2589 rtx loop_count;
2590
2591 /* The counter register. */
2592 rtx counter;
2593};
2594
2595/* A structure describing a doloop_end. */
2596struct pru_doloop_end {
2597 /* The next doloop_end with the same loop tag. */
2598 struct pru_doloop_end *next;
2599
2600 /* The instruction itself. */
2601 rtx_insn *insn;
2602
2603 /* The first instruction after INSN when the branch isn't taken. */
2604 rtx_insn *fallthrough;
2605
2606 /* The location of the counter value. Since doloop_end_internal is a
2607 jump instruction, it has to allow the counter to be stored anywhere
2608 (any non-fixed register). */
2609 rtx counter;
2610
2611 /* The target label (the place where the insn branches when the counter
2612 isn't zero). */
2613 rtx label;
2614
2615 /* A scratch register. Only available when COUNTER isn't stored
2616 in a general register. */
2617 rtx scratch;
2618};
2619
2620
2621/* One do-while loop. */
2622struct pru_doloop {
2623 /* All the doloop_begins for this loop (in no particular order). */
2624 struct pru_doloop_begin *begin;
2625
2626 /* All the doloop_ends. When there is more than one, arrange things
2627 so that the first one is the most likely to be X in case (2) above. */
2628 struct pru_doloop_end *end;
2629};
2630
2631
2632/* Return true if LOOP can be converted into LOOP form
2633 (that is, if it matches cases (1) or (2) above). */
2634
2635static bool
2636pru_repeat_loop_p (struct pru_doloop *loop)
2637{
2638 struct pru_doloop_end *end;
2639 rtx_insn *fallthrough;
2640
2641 /* There must be exactly one doloop_begin and at least one doloop_end. */
2642 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2643 return false;
2644
2645 /* The first doloop_end (X) must branch back to the insn after
2646 the doloop_begin. */
2647 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2648 return false;
2649
2650 /* Check that the first doloop_end (X) can actually reach
2651 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2652 if (get_attr_length (loop->end->insn) != 4)
2653 return false;
2654
2655 /* All the other doloop_ends must branch to the same place as X.
2656 When the branch isn't taken, they must jump to the instruction
2657 after X. */
2658 fallthrough = loop->end->fallthrough;
2659 for (end = loop->end->next; end != 0; end = end->next)
2660 if (end->label != loop->end->label
2661 || !simplejump_p (end->fallthrough)
2662 || fallthrough
2663 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2664 return false;
2665
2666 return true;
2667}
2668
2669
2670/* The main repeat reorg function. See comment above for details. */
2671
2672static void
2673pru_reorg_loop (rtx_insn *insns)
2674{
2675 rtx_insn *insn;
2676 struct pru_doloop *loops, *loop;
2677 struct pru_doloop_begin *begin;
2678 struct pru_doloop_end *end;
2679 size_t tmpsz;
2680
2681 /* Quick exit if we haven't created any loops. */
2682 if (cfun->machine->doloop_tags == 0)
2683 return;
2684
2685 /* Create an array of pru_doloop structures. */
2686 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2687 loops = (struct pru_doloop *) alloca (tmpsz);
2688 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2689
2690 /* Search the function for do-while insns and group them by loop tag. */
2691 for (insn = insns; insn; insn = NEXT_INSN (insn))
2692 if (INSN_P (insn))
2693 switch (recog_memoized (insn))
2694 {
2695 case CODE_FOR_doloop_begin_internalhi:
2696 case CODE_FOR_doloop_begin_internalsi:
2697 insn_extract (insn);
2698 loop = &loops[INTVAL (recog_data.operand[2])];
2699
2700 tmpsz = sizeof (struct pru_doloop_begin);
2701 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2702 begin->next = loop->begin;
2703 begin->insn = insn;
2704 begin->loop_count = recog_data.operand[1];
2705 begin->counter = recog_data.operand[0];
2706
2707 loop->begin = begin;
2708 break;
2709
2710 case CODE_FOR_doloop_end_internalhi:
2711 case CODE_FOR_doloop_end_internalsi:
2712 insn_extract (insn);
2713 loop = &loops[INTVAL (recog_data.operand[2])];
2714
2715 tmpsz = sizeof (struct pru_doloop_end);
2716 end = (struct pru_doloop_end *) alloca (tmpsz);
2717 end->insn = insn;
2718 end->fallthrough = next_real_insn (insn);
2719 end->counter = recog_data.operand[0];
2720 end->label = recog_data.operand[1];
2721 end->scratch = recog_data.operand[3];
2722
2723 /* If this insn falls through to an unconditional jump,
2724 give it a lower priority than the others. */
2725 if (loop->end != 0 && simplejump_p (end->fallthrough))
2726 {
2727 end->next = loop->end->next;
2728 loop->end->next = end;
2729 }
2730 else
2731 {
2732 end->next = loop->end;
2733 loop->end = end;
2734 }
2735 break;
2736 }
2737
2738 /* Convert the insns for each loop in turn. */
2739 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2740 if (pru_repeat_loop_p (loop))
2741 {
2742 /* Case (1) or (2). */
2743 rtx_code_label *repeat_label;
2744 rtx label_ref;
f886644b 2745 rtx loop_rtx;
8d2af3a2
DD
2746
2747 /* Create a new label for the repeat insn. */
2748 repeat_label = gen_label_rtx ();
2749
2750 /* Replace the doloop_begin with a repeat. We get rid
2751 of the iteration register because LOOP instruction
2752 will utilize an internal for the PRU core LOOP register. */
2753 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2754 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
f886644b 2755 if (loop_mode == VOIDmode)
8d2af3a2
DD
2756 {
2757 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2758 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
f886644b 2759 loop_mode = SImode;
8d2af3a2 2760 }
f886644b
DD
2761 gcc_assert (loop_mode == HImode || loop_mode == SImode);
2762 loop_rtx = gen_pruloop (loop_mode, loop->begin->loop_count, label_ref);
2763 emit_insn_before (loop_rtx, loop->begin->insn);
2764
8d2af3a2
DD
2765 delete_insn (loop->begin->insn);
2766
2767 /* Insert the repeat label before the first doloop_end.
2768 Fill the gap with nops if LOOP insn is less than 2
2769 instructions away than loop->end. */
2770 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2771 loop->end->next != 0);
2772
2773 /* Emit a pruloop_end (to improve the readability of the output). */
2774 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2775
2776 /* HACK: TODO: This is usually not needed, but is required for
2777 a few rare cases where a JUMP that breaks the loop
2778 references the LOOP_END address. In other words, since
2779 we're missing a real "loop_end" instruction, a loop "break"
2780 may accidentally reference the loop end itself, and thus
2781 continuing the cycle. */
2782 for (insn = NEXT_INSN (loop->end->insn);
2783 insn != next_real_insn (loop->end->insn);
2784 insn = NEXT_INSN (insn))
2785 {
2786 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2787 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2788 }
2789
2790 /* Delete the first doloop_end. */
2791 delete_insn (loop->end->insn);
2792
2793 /* Replace the others with branches to REPEAT_LABEL. */
2794 for (end = loop->end->next; end != 0; end = end->next)
2795 {
2796 rtx_insn *newjmp;
2797 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2798 JUMP_LABEL (newjmp) = repeat_label;
2799 delete_insn (end->insn);
2800 delete_insn (end->fallthrough);
2801 }
2802 }
2803 else
2804 {
2805 /* Case (3). First replace all the doloop_begins with setting
2806 the HW register used for loop counter. */
2807 for (begin = loop->begin; begin != 0; begin = begin->next)
2808 {
2809 insn = gen_move_insn (copy_rtx (begin->counter),
2810 copy_rtx (begin->loop_count));
2811 emit_insn_before (insn, begin->insn);
2812 delete_insn (begin->insn);
2813 }
2814
2815 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2816 for (end = loop->end; end != 0; end = end->next)
2817 {
2818 rtx reg;
2819
2820 start_sequence ();
2821
2822 /* Load the counter value into a general register. */
2823 reg = end->counter;
2824 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2825 {
2826 reg = end->scratch;
2827 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2828 }
2829
2830 /* Decrement the counter. */
2831 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2832 constm1_rtx));
2833
2834 /* Copy it back to its original location. */
2835 if (reg != end->counter)
2836 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2837
2838 /* Jump back to the start label. */
2839 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2840 const0_rtx),
2841 reg,
2842 const0_rtx,
2843 end->label));
2844
2845 JUMP_LABEL (insn) = end->label;
2846 LABEL_NUSES (end->label)++;
2847
2848 /* Emit the whole sequence before the doloop_end. */
2849 insn = get_insns ();
2850 end_sequence ();
2851 emit_insn_before (insn, end->insn);
2852
2853 /* Delete the doloop_end. */
2854 delete_insn (end->insn);
2855 }
2856 }
2857}
2858
2859/* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2860static void
2861pru_reorg (void)
2862{
2863 rtx_insn *insns = get_insns ();
2864
2865 compute_bb_for_insn ();
2866 df_analyze ();
2867
2868 /* Need correct insn lengths for allowing LOOP instruction
2869 emitting due to U8_PCREL limitations. */
2870 shorten_branches (get_insns ());
2871
2872 /* The generic reorg_loops () is not suitable for PRU because
2873 it doesn't handle doloop_begin/end tying. And we need our
2874 doloop_begin emitted before reload. It is difficult to coalesce
2875 UBYTE constant initial loop values into the LOOP insn during
2876 machine reorg phase. */
2877 pru_reorg_loop (insns);
2878
2879 df_finish_pass (false);
2880}
2881\f
2882/* Enumerate all PRU-specific builtins. */
2883enum pru_builtin
2884{
2885 PRU_BUILTIN_DELAY_CYCLES,
5ace1776
DD
2886 PRU_BUILTIN_HALT,
2887 PRU_BUILTIN_LMBD,
8d2af3a2
DD
2888 PRU_BUILTIN_max
2889};
2890
2891static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2892
2893/* Implement TARGET_INIT_BUILTINS. */
2894
2895static void
2896pru_init_builtins (void)
2897{
2898 tree void_ftype_longlong
2899 = build_function_type_list (void_type_node,
2900 long_long_integer_type_node,
2901 NULL);
5ace1776
DD
2902 tree uint_ftype_uint_uint
2903 = build_function_type_list (unsigned_type_node,
2904 unsigned_type_node,
2905 unsigned_type_node,
2906 NULL);
2907
2908 tree void_ftype_void
2909 = build_function_type_list (void_type_node,
2910 void_type_node,
2911 NULL);
8d2af3a2
DD
2912
2913 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2914 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2915 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2916 NULL_TREE);
5ace1776
DD
2917
2918 pru_builtins[PRU_BUILTIN_HALT]
2919 = add_builtin_function ("__halt", void_ftype_void,
2920 PRU_BUILTIN_HALT, BUILT_IN_MD, NULL,
2921 NULL_TREE);
2922
2923 pru_builtins[PRU_BUILTIN_LMBD]
2924 = add_builtin_function ("__lmbd", uint_ftype_uint_uint,
2925 PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL,
2926 NULL_TREE);
8d2af3a2
DD
2927}
2928
2929/* Implement TARGET_BUILTIN_DECL. */
2930
2931static tree
2932pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2933{
2934 switch (code)
2935 {
2936 case PRU_BUILTIN_DELAY_CYCLES:
5ace1776
DD
2937 case PRU_BUILTIN_HALT:
2938 case PRU_BUILTIN_LMBD:
8d2af3a2
DD
2939 return pru_builtins[code];
2940 default:
2941 return error_mark_node;
2942 }
2943}
2944\f
2945/* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2946 code that delays exactly ARG cycles. */
2947
2948static rtx
2949pru_expand_delay_cycles (rtx arg)
2950{
2951 HOST_WIDE_INT c, n;
2952
2953 if (GET_CODE (arg) != CONST_INT)
2954 {
2955 error ("%<__delay_cycles%> only takes constant arguments");
2956 return NULL_RTX;
2957 }
2958
2959 c = INTVAL (arg);
2960
2961 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2962 if (c < 0)
2963 {
2964 error ("%<__delay_cycles%> only takes non-negative cycle counts");
2965 return NULL_RTX;
2966 }
2967
2968 emit_insn (gen_delay_cycles_start (arg));
2969
2970 /* For 32-bit loops, there's 2 + 2x cycles. */
2971 if (c > 2 * 0xffff + 1)
2972 {
2973 n = (c - 2) / 2;
2974 c -= (n * 2) + 2;
2975 if ((unsigned long long) n > 0xffffffffULL)
2976 {
2977 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
2978 return NULL_RTX;
2979 }
2980 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
2981 }
2982
2983 /* For 16-bit loops, there's 1 + 2x cycles. */
2984 if (c > 2)
2985 {
2986 n = (c - 1) / 2;
2987 c -= (n * 2) + 1;
2988
2989 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
2990 }
2991
2992 while (c > 0)
2993 {
2994 emit_insn (gen_delay_cycles_1 ());
2995 c -= 1;
2996 }
2997
2998 emit_insn (gen_delay_cycles_end (arg));
2999
3000 return NULL_RTX;
3001}
3002
3003
3004/* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
3005 a built-in function, with result going to TARGET if that's convenient
3006 (and in mode MODE if that's convenient).
3007 SUBTARGET may be used as the target for computing one of EXP's operands.
3008 IGNORE is nonzero if the value is to be ignored. */
3009
3010static rtx
5ace1776 3011pru_expand_builtin (tree exp, rtx target,
8d2af3a2 3012 rtx subtarget ATTRIBUTE_UNUSED,
5ace1776 3013 machine_mode mode,
8d2af3a2
DD
3014 int ignore ATTRIBUTE_UNUSED)
3015{
3016 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3017 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
8d2af3a2 3018
5ace1776
DD
3019 switch (fcode)
3020 {
3021 case PRU_BUILTIN_DELAY_CYCLES:
3022 {
3023 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3024 return pru_expand_delay_cycles (arg1);
3025 }
3026 break;
3027 case PRU_BUILTIN_HALT:
3028 {
3029 emit_insn (gen_pru_halt ());
3030 return NULL_RTX;
3031 }
3032 break;
3033 case PRU_BUILTIN_LMBD:
3034 {
3035 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3036 rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
3037
3038 if (target == NULL_RTX || GET_MODE (target) != mode)
3039 {
3040 target = gen_reg_rtx (mode);
3041 }
8d2af3a2 3042
5ace1776
DD
3043 emit_insn (gen_pru_lmbd (mode, target, arg1, arg2));
3044 return target;
3045 }
3046 break;
3047 default:
3048 internal_error ("bad builtin code");
3049 }
8d2af3a2
DD
3050
3051 return NULL_RTX;
3052}
3053\f
3054/* Remember the last target of pru_set_current_function. */
3055static GTY(()) tree pru_previous_fndecl;
3056
3057/* Establish appropriate back-end context for processing the function
3058 FNDECL. The argument might be NULL to indicate processing at top
3059 level, outside of any function scope. */
3060static void
3061pru_set_current_function (tree fndecl)
3062{
3063 tree old_tree = (pru_previous_fndecl
3064 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
3065 : NULL_TREE);
3066
3067 tree new_tree = (fndecl
3068 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3069 : NULL_TREE);
3070
3071 if (fndecl && fndecl != pru_previous_fndecl)
3072 {
3073 pru_previous_fndecl = fndecl;
3074 if (old_tree == new_tree)
3075 ;
3076
3077 else if (new_tree)
3078 {
ba948b37 3079 cl_target_option_restore (&global_options, &global_options_set,
8d2af3a2
DD
3080 TREE_TARGET_OPTION (new_tree));
3081 target_reinit ();
3082 }
3083
3084 else if (old_tree)
3085 {
3086 struct cl_target_option *def
3087 = TREE_TARGET_OPTION (target_option_current_node);
3088
ba948b37 3089 cl_target_option_restore (&global_options, &global_options_set, def);
8d2af3a2
DD
3090 target_reinit ();
3091 }
3092 }
3093}
3094\f
3095/* Implement TARGET_UNWIND_WORD_MODE.
3096
3097 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
3098static scalar_int_mode
3099pru_unwind_word_mode (void)
3100{
3101 return SImode;
3102}
3103\f
3104
3105/* Initialize the GCC target structure. */
3106#undef TARGET_ASM_FUNCTION_PROLOGUE
3107#define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
3108#undef TARGET_ASM_INTEGER
3109#define TARGET_ASM_INTEGER pru_assemble_integer
dda85bc2
DD
3110#undef TARGET_SECTION_TYPE_FLAGS
3111#define TARGET_SECTION_TYPE_FLAGS pru_section_type_flags
8d2af3a2
DD
3112
3113#undef TARGET_ASM_FILE_START
3114#define TARGET_ASM_FILE_START pru_file_start
3115
8bafc964
DD
3116#undef TARGET_INSERT_ATTRIBUTES
3117#define TARGET_INSERT_ATTRIBUTES pru_insert_attributes
3118
8d2af3a2
DD
3119#undef TARGET_INIT_BUILTINS
3120#define TARGET_INIT_BUILTINS pru_init_builtins
3121#undef TARGET_EXPAND_BUILTIN
3122#define TARGET_EXPAND_BUILTIN pru_expand_builtin
3123#undef TARGET_BUILTIN_DECL
3124#define TARGET_BUILTIN_DECL pru_builtin_decl
3125
3126#undef TARGET_COMPUTE_FRAME_LAYOUT
3127#define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
3128
3129#undef TARGET_FUNCTION_OK_FOR_SIBCALL
3130#define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
3131
3132#undef TARGET_CAN_ELIMINATE
3133#define TARGET_CAN_ELIMINATE pru_can_eliminate
3134
3135#undef TARGET_HARD_REGNO_MODE_OK
3136#define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
3137
3138#undef TARGET_HARD_REGNO_SCRATCH_OK
3139#define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
8d2af3a2
DD
3140
3141#undef TARGET_FUNCTION_ARG
3142#define TARGET_FUNCTION_ARG pru_function_arg
3143
3144#undef TARGET_FUNCTION_ARG_ADVANCE
3145#define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
3146
3147#undef TARGET_ARG_PARTIAL_BYTES
3148#define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
3149
3150#undef TARGET_FUNCTION_VALUE
3151#define TARGET_FUNCTION_VALUE pru_function_value
3152
3153#undef TARGET_LIBCALL_VALUE
3154#define TARGET_LIBCALL_VALUE pru_libcall_value
3155
3156#undef TARGET_FUNCTION_VALUE_REGNO_P
3157#define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
3158
3159#undef TARGET_RETURN_IN_MEMORY
3160#define TARGET_RETURN_IN_MEMORY pru_return_in_memory
3161
3162#undef TARGET_MUST_PASS_IN_STACK
3163#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
3164
8bafc964
DD
3165#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
3166#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
3167 pru_addr_space_legitimate_address_p
8d2af3a2
DD
3168
3169#undef TARGET_INIT_LIBFUNCS
3170#define TARGET_INIT_LIBFUNCS pru_init_libfuncs
3171#undef TARGET_LIBFUNC_GNU_PREFIX
3172#define TARGET_LIBFUNC_GNU_PREFIX true
3173
3174#undef TARGET_RTX_COSTS
3175#define TARGET_RTX_COSTS pru_rtx_costs
3176
3177#undef TARGET_PRINT_OPERAND
3178#define TARGET_PRINT_OPERAND pru_print_operand
3179
3180#undef TARGET_PRINT_OPERAND_ADDRESS
3181#define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3182
3183#undef TARGET_OPTION_OVERRIDE
3184#define TARGET_OPTION_OVERRIDE pru_option_override
3185
3186#undef TARGET_SET_CURRENT_FUNCTION
3187#define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3188
3189#undef TARGET_MACHINE_DEPENDENT_REORG
3190#define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3191
3192#undef TARGET_CAN_USE_DOLOOP_P
3193#define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3194
3195#undef TARGET_INVALID_WITHIN_DOLOOP
3196#define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3197
3198#undef TARGET_UNWIND_WORD_MODE
3199#define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3200
3201#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3202#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3203
3204struct gcc_target targetm = TARGET_INITIALIZER;
3205
3206#include "gt-pru.h"