]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pru/pru.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / pru / pru.cc
1 /* Target machine subroutines for TI PRU.
2 Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 Dimitar Dimitrov <dimitar@dinux.eu>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "df.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "explow.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "expr.h"
47 #include "toplev.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "stor-layout.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
53 #include "pru-protos.h"
54
55 /* This file should be included last. */
56 #include "target-def.h"
57
58 #define INIT_ARRAY_ENTRY_BYTES 2
59
60 /* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62 struct pru_ctable_entry pru_ctable[32];
63
64 /* Forward function declarations. */
65 static bool prologue_saved_reg_p (int);
66 static void pru_reorg_loop (rtx_insn *);
67
68 struct GTY (()) machine_function
69 {
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
72
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
91 };
92 \f
93 /* Stack layout and calling conventions.
94
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
98
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
110
111 */
112
113 #define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115 /* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116 static void
117 pru_compute_frame_layout (void)
118 {
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
125
126 gcc_assert (!cfun->machine->initialized);
127
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
130
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
134
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
139 {
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
142 }
143
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
155 }
156
157 /* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
163 */
164 static rtx
165 pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
167 {
168 rtx insn;
169
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
175 {
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
182 {
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185 }
186 else
187 {
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190 }
191 }
192
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197 RTX_FRAME_RELATED_P (insn) = 1;
198
199 return insn;
200 }
201
202 /* Add a const_int to the stack pointer register. */
203 static rtx
204 pru_add_to_sp (int addendum, const enum reg_note kind)
205 {
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
208 }
209
210 /* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212 static int
213 xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214 {
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
218
219 nregs = 0;
220
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
227
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231 {
232 regno++;
233 nregs++;
234 }
235 else
236 break;
237
238 if (!nregs)
239 return -1;
240
241 gcc_assert (UBYTE_INT (*sp_offset));
242
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
254
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
258
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
261
262 if (!do_store)
263 {
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267 }
268
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
271
272 return regno_start + nregs;
273 }
274
275 /* Emit function prologue. */
276 void
277 pru_expand_prologue (void)
278 {
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
284
285 total_frame_size = cfun->machine->total_size;
286
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
289
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
292 {
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
298 }
299 else if (total_frame_size)
300 {
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
304 }
305 else
306 save_regs_base = sp_offset = 0;
307
308 regno_start = 0;
309 save_offset = save_regs_base;
310 do
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
313
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
317 {
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
321
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
324 }
325
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
333 }
334
335 /* Emit function epilogue. */
336 void
337 pru_expand_epilogue (bool sibcall_p)
338 {
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
342
343 if (!sibcall_p && pru_can_use_return_insn ())
344 {
345 emit_jump_insn (gen_return ());
346 return;
347 }
348
349 emit_insn (gen_blockage ());
350
351 total_frame_size = cfun->machine->total_size;
352
353 if (frame_pointer_needed)
354 {
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
359
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362 }
363 else if (!UBYTE_INT (total_frame_size))
364 {
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368 }
369 else
370 {
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
373 }
374
375 regno_start = 0;
376 do
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
379
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
382
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
386
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
392 }
393
394 /* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
396 rtx
397 pru_get_return_address (int count)
398 {
399 if (count != 0)
400 return NULL_RTX;
401
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
404 }
405
406 /* Implement FUNCTION_PROFILER macro. */
407 void
408 pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409 {
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
413 }
414
415 /* Dump stack layout. */
416 static void
417 pru_dump_frame_layout (FILE *file)
418 {
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
438 }
439
440 /* Return true if REGNO should be saved in the prologue. */
441 static bool
442 prologue_saved_reg_p (int regno)
443 {
444 gcc_assert (GP_REG_P (regno));
445
446 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
447 return true;
448
449 /* 32-bit FP. */
450 if (frame_pointer_needed
451 && regno >= HARD_FRAME_POINTER_REGNUM
452 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453 return true;
454
455 /* 16-bit RA. */
456 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457 return true;
458 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459 return true;
460
461 return false;
462 }
463
464 /* Implement TARGET_CAN_ELIMINATE. */
465 static bool
466 pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467 {
468 if (to == STACK_POINTER_REGNUM)
469 return !frame_pointer_needed;
470 return true;
471 }
472
473 /* Implement INITIAL_ELIMINATION_OFFSET macro. */
474 int
475 pru_initial_elimination_offset (int from, int to)
476 {
477 int offset;
478
479 /* Set OFFSET to the offset from the stack pointer. */
480 switch (from)
481 {
482 case FRAME_POINTER_REGNUM:
483 offset = cfun->machine->out_args_size;
484 break;
485
486 case ARG_POINTER_REGNUM:
487 offset = cfun->machine->total_size;
488 offset -= crtl->args.pretend_args_size;
489 break;
490
491 default:
492 gcc_unreachable ();
493 }
494
495 /* If we are asked for the frame pointer offset, then adjust OFFSET
496 by the offset from the frame pointer to the stack pointer. */
497 if (to == HARD_FRAME_POINTER_REGNUM)
498 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501 return offset;
502 }
503
504 /* Return nonzero if this function is known to have a null epilogue.
505 This allows the optimizer to omit jumps to jumps if no stack
506 was created. */
507 int
508 pru_can_use_return_insn (void)
509 {
510 if (!reload_completed || crtl->profile)
511 return 0;
512
513 return cfun->machine->total_size == 0;
514 }
515 \f
516 /* Implement TARGET_HARD_REGNO_MODE_OK. */
517
518 static bool
519 pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520 {
521 switch (GET_MODE_SIZE (mode))
522 {
523 case 1: return true;
524 case 2: return (regno % 4) <= 2;
525 case 4: return (regno % 4) == 0;
526 case 8: return (regno % 4) == 0;
527 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
528 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
529 default:
530 /* TODO: Find out why VOIDmode and BLKmode are passed. */
531 gcc_assert (mode == BLKmode || mode == VOIDmode);
532 return (regno % 4) == 0;
533 }
534 }
535
536 /* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537 Returns true if REGNO is safe to be allocated as a scratch
538 register (for a define_peephole2) in the current function. */
539
540 static bool
541 pru_hard_regno_scratch_ok (unsigned int regno)
542 {
543 /* Don't allow hard registers that might be part of the frame pointer.
544 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545 and don't handle a frame pointer that spans more than one register.
546 TODO: Fix those faulty places. */
547
548 if ((!reload_completed || frame_pointer_needed)
549 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550 HARD_FRAME_POINTER_REGNUM + 3)
551 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552 FRAME_POINTER_REGNUM + 3)))
553 return false;
554
555 return true;
556 }
557
558
559 /* Worker function for `HARD_REGNO_RENAME_OK'.
560 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
561
562 int
563 pru_hard_regno_rename_ok (unsigned int old_reg,
564 unsigned int new_reg)
565 {
566 /* Don't allow hard registers that might be part of the frame pointer.
567 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
568 and don't care for a frame pointer that spans more than one register.
569 TODO: Fix those faulty places. */
570 if ((!reload_completed || frame_pointer_needed)
571 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
572 HARD_FRAME_POINTER_REGNUM + 3)
573 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
574 FRAME_POINTER_REGNUM + 3)
575 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
576 HARD_FRAME_POINTER_REGNUM + 3)
577 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
578 FRAME_POINTER_REGNUM + 3)))
579 return 0;
580
581 return 1;
582 }
583 \f
584 /* Allocate a chunk of memory for per-function machine-dependent data. */
585 static struct machine_function *
586 pru_init_machine_status (void)
587 {
588 return ggc_cleared_alloc<machine_function> ();
589 }
590
591 /* Implement TARGET_OPTION_OVERRIDE. */
592 static void
593 pru_option_override (void)
594 {
595 #ifdef SUBTARGET_OVERRIDE_OPTIONS
596 SUBTARGET_OVERRIDE_OPTIONS;
597 #endif
598
599 /* Check for unsupported options. */
600 if (flag_pic == 1)
601 warning (OPT_fpic, "%<-fpic%> is not supported");
602 if (flag_pic == 2)
603 warning (OPT_fPIC, "%<-fPIC%> is not supported");
604 if (flag_pie == 1)
605 warning (OPT_fpie, "%<-fpie%> is not supported");
606 if (flag_pie == 2)
607 warning (OPT_fPIE, "%<-fPIE%> is not supported");
608
609 /* QBxx conditional branching cannot cope with block reordering. */
610 if (flag_reorder_blocks_and_partition)
611 {
612 inform (input_location, "%<-freorder-blocks-and-partition%> "
613 "not supported on this architecture");
614 flag_reorder_blocks_and_partition = 0;
615 flag_reorder_blocks = 1;
616 }
617
618 /* Function to allocate machine-dependent function status. */
619 init_machine_status = &pru_init_machine_status;
620
621 /* Save the initial options in case the user does function specific
622 options. */
623 target_option_default_node = target_option_current_node
624 = build_target_option_node (&global_options, &global_options_set);
625
626 /* Due to difficulties in implementing the TI ABI with GCC,
627 at least check and error-out if GCC cannot compile a
628 compliant output. */
629 pru_register_abicheck_pass ();
630 }
631 \f
632 /* Compute a (partial) cost for rtx X. Return true if the complete
633 cost has been computed, and false if subexpressions should be
634 scanned. In either case, *TOTAL contains the cost result. */
635 static bool
636 pru_rtx_costs (rtx x, machine_mode mode,
637 int outer_code, int opno ATTRIBUTE_UNUSED,
638 int *total, bool speed ATTRIBUTE_UNUSED)
639 {
640 const int code = GET_CODE (x);
641
642 switch (code)
643 {
644 case CONST_INT:
645 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
646 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
647 {
648 *total = COSTS_N_INSNS (0);
649 return true;
650 }
651 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
652 || (mode != VOIDmode && const_uhword_operand (x, mode)))
653 {
654 *total = COSTS_N_INSNS (1);
655 return true;
656 }
657 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
658 {
659 *total = COSTS_N_INSNS (0);
660 return true;
661 }
662 else
663 {
664 *total = COSTS_N_INSNS (2);
665 return true;
666 }
667
668 case LABEL_REF:
669 case SYMBOL_REF:
670 case CONST:
671 {
672 *total = COSTS_N_INSNS (1);
673 return true;
674 }
675 case CONST_DOUBLE:
676 {
677 *total = COSTS_N_INSNS (2);
678 return true;
679 }
680 case CONST_WIDE_INT:
681 {
682 /* PRU declares no vector or very large integer types. */
683 gcc_unreachable ();
684 return true;
685 }
686 case SET:
687 {
688 int factor;
689
690 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
691 the mode for the factor. */
692 mode = GET_MODE (SET_DEST (x));
693
694 /* SI move has the same cost as a QI move. Moves larger than
695 64 bits are costly. */
696 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
697 *total = factor * COSTS_N_INSNS (1);
698
699 return false;
700 }
701
702 case MULT:
703 {
704 /* Factor in that "mul" requires fixed registers, which
705 would likely require register moves. */
706 *total = COSTS_N_INSNS (7);
707 return false;
708 }
709 case PLUS:
710 {
711 rtx op0 = XEXP (x, 0);
712 rtx op1 = XEXP (x, 1);
713 machine_mode op1_mode = GET_MODE (op1);
714
715 /* Generic RTL address expressions do not enforce mode for
716 offsets, yet our UBYTE constraint requires it. Fix it here. */
717 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
718 op1_mode = Pmode;
719 if (outer_code == MEM
720 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
721 || ctable_addr_operand (op0, VOIDmode)
722 || ctable_addr_operand (op1, VOIDmode)
723 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
724 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
725 {
726 /* CTABLE or REG base addressing - PLUS comes for free. */
727 *total = COSTS_N_INSNS (0);
728 return true;
729 }
730 else
731 {
732 *total = COSTS_N_INSNS (1);
733 return false;
734 }
735 }
736 case SIGN_EXTEND:
737 {
738 *total = COSTS_N_INSNS (3);
739 return false;
740 }
741 case ASHIFTRT:
742 {
743 rtx op1 = XEXP (x, 1);
744 if (const_1_operand (op1, VOIDmode))
745 *total = COSTS_N_INSNS (3);
746 else
747 *total = COSTS_N_INSNS (7);
748 return false;
749 }
750 case ZERO_EXTRACT:
751 {
752 rtx op2 = XEXP (x, 2);
753 if ((outer_code == EQ || outer_code == NE)
754 && CONST_INT_P (op2)
755 && INTVAL (op2) == 1)
756 {
757 /* Branch if bit is set/clear is a single instruction. */
758 *total = COSTS_N_INSNS (0);
759 return true;
760 }
761 else
762 {
763 *total = COSTS_N_INSNS (2);
764 return false;
765 }
766 }
767 case ZERO_EXTEND:
768 {
769 /* 64-bit zero extensions actually have a cost because they
770 require setting a register to zero.
771 32-bit and smaller are free. */
772 int factor = (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (SImode)) ? 0 : 1;
773 *total = factor * COSTS_N_INSNS (1);
774 return false;
775 }
776
777 default:
778 {
779 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
780 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
781 *total = factor * COSTS_N_INSNS (1);
782 return false;
783 }
784 }
785 }
786
787 /* Insn costs on PRU are straightforward because:
788 - Insns emit 0, 1 or more instructions.
789 - All instructions are 32-bit length.
790 - All instructions execute in 1 cycle (sans memory access delays).
791 The "length" attribute maps nicely to the insn cost. */
792
793 static int
794 pru_insn_cost (rtx_insn *insn, bool speed)
795 {
796 /* Use generic cost calculation for unrecognized insns. */
797 if (recog_memoized (insn) < 0)
798 return pattern_cost (insn, speed);
799
800 unsigned int len = get_attr_length (insn);
801
802 gcc_assert ((len % 4) == 0);
803
804 int cost = COSTS_N_INSNS (len / 4);
805 /* Some insns have zero length (e.g. blockage, pruloop_end).
806 In such cases give the minimum cost, because a return of
807 0 would incorrectly indicate that the insn cost is unknown. */
808 if (cost == 0)
809 cost = 1;
810
811 /* Writes are usually posted, so they take 1 cycle. Reads
812 from DMEM usually take 3 cycles.
813 See TI document SPRACE8A, Device-Specific PRU Read Latency Values. */
814 if (speed && get_attr_type (insn) == TYPE_LD)
815 cost += COSTS_N_INSNS (2);
816
817 return cost;
818 }
819 \f
820 static GTY(()) rtx eqdf_libfunc;
821 static GTY(()) rtx nedf_libfunc;
822 static GTY(()) rtx ledf_libfunc;
823 static GTY(()) rtx ltdf_libfunc;
824 static GTY(()) rtx gedf_libfunc;
825 static GTY(()) rtx gtdf_libfunc;
826 static GTY(()) rtx eqsf_libfunc;
827 static GTY(()) rtx nesf_libfunc;
828 static GTY(()) rtx lesf_libfunc;
829 static GTY(()) rtx ltsf_libfunc;
830 static GTY(()) rtx gesf_libfunc;
831 static GTY(()) rtx gtsf_libfunc;
832
833 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
834 functions to match the PRU ABI. */
835
836 static void
837 pru_init_libfuncs (void)
838 {
839 /* Double-precision floating-point arithmetic. */
840 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
841 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
842 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
843 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
844 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
845
846 /* Single-precision floating-point arithmetic. */
847 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
848 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
849 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
850 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
851 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
852
853 /* Floating-point comparisons. */
854 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
855 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
856 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
857 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
858 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
859 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
860 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
861 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
862 ledf_libfunc = init_one_libfunc ("__pruabi_led");
863 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
864 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
865 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
866
867 /* In PRU ABI, much like other TI processors, floating point
868 comparisons return non-standard values. This quirk is handled
869 by disabling the optab library functions, and handling the
870 comparison during RTL expansion. */
871 set_optab_libfunc (eq_optab, SFmode, NULL);
872 set_optab_libfunc (ne_optab, SFmode, NULL);
873 set_optab_libfunc (gt_optab, SFmode, NULL);
874 set_optab_libfunc (ge_optab, SFmode, NULL);
875 set_optab_libfunc (lt_optab, SFmode, NULL);
876 set_optab_libfunc (le_optab, SFmode, NULL);
877 set_optab_libfunc (eq_optab, DFmode, NULL);
878 set_optab_libfunc (ne_optab, DFmode, NULL);
879 set_optab_libfunc (gt_optab, DFmode, NULL);
880 set_optab_libfunc (ge_optab, DFmode, NULL);
881 set_optab_libfunc (lt_optab, DFmode, NULL);
882 set_optab_libfunc (le_optab, DFmode, NULL);
883
884 /* The isunordered function appears to be supported only by GCC. */
885 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
886 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
887
888 /* Floating-point to integer conversions. */
889 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
890 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
891 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
892 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
893 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
894 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
895 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
896 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
897
898 /* Conversions between floating types. */
899 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
900 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
901
902 /* Integer to floating-point conversions. */
903 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
904 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
905 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
906 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
907 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
908 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
909 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
910 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
911
912 /* Long long. */
913 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
914 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
915 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
916 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
917
918 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
919 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
920 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
921 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
922 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
923 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
924 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
925 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
926 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
927 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
928 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
929 }
930
931 /* Given a comparison CODE, return a similar comparison but without
932 the "equals" condition. In other words, it strips GE/GEU/LE/LEU
933 and instead returns GT/GTU/LT/LTU. */
934
935 enum rtx_code
936 pru_noteq_condition (enum rtx_code code)
937 {
938 switch (code)
939 {
940 case GT: return GT;
941 case GTU: return GTU;
942 case GE: return GT;
943 case GEU: return GTU;
944 case LT: return LT;
945 case LTU: return LTU;
946 case LE: return LT;
947 case LEU: return LTU;
948 default:
949 gcc_unreachable ();
950 }
951 }
952
953 /* Emit comparison instruction if necessary, returning the expression
954 that holds the compare result in the proper mode. Return the comparison
955 that should be used in the jump insn. */
956
957 rtx
958 pru_expand_fp_compare (rtx comparison, machine_mode mode)
959 {
960 enum rtx_code code = GET_CODE (comparison);
961 rtx op0 = XEXP (comparison, 0);
962 rtx op1 = XEXP (comparison, 1);
963 rtx cmp;
964 enum rtx_code jump_code = code;
965 machine_mode op_mode = GET_MODE (op0);
966 rtx_insn *insns;
967 rtx libfunc;
968
969 gcc_assert (op_mode == DFmode || op_mode == SFmode);
970
971 /* FP exceptions are not raised by PRU's softfp implementation. So the
972 following transformations are safe. */
973 if (code == UNGE)
974 {
975 code = LT;
976 jump_code = EQ;
977 }
978 else if (code == UNLE)
979 {
980 code = GT;
981 jump_code = EQ;
982 }
983 else
984 jump_code = NE;
985
986 switch (code)
987 {
988 case EQ:
989 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
990 break;
991 case NE:
992 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
993 break;
994 case GT:
995 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
996 break;
997 case GE:
998 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
999 break;
1000 case LT:
1001 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1002 break;
1003 case LE:
1004 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1005 break;
1006 default:
1007 gcc_unreachable ();
1008 }
1009 start_sequence ();
1010
1011 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
1012 op0, op_mode, op1, op_mode);
1013 insns = get_insns ();
1014 end_sequence ();
1015
1016 emit_libcall_block (insns, cmp, cmp,
1017 gen_rtx_fmt_ee (code, SImode, op0, op1));
1018
1019 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1020 }
1021 \f
1022 /* Return the sign bit position for given OP's mode. */
1023 static int
1024 sign_bit_position (const rtx op)
1025 {
1026 const int sz = GET_MODE_SIZE (GET_MODE (op));
1027
1028 return sz * 8 - 1;
1029 }
1030
1031 /* Parse the given CVAL integer value, and extract the "filling" byte
1032 range of consecutive 0xff byte values. Rest of bytes must be 0x00.
1033 There must be only one range in the given value. This range would
1034 typically be used to calculate the parameters of
1035 PRU instructions ZERO and FILL.
1036
1037 The parameter MODE determines the maximum byte range to consider
1038 in the given input constant.
1039
1040 Example input:
1041 cval = 0xffffffffffffff00 = -256
1042 mode = SImode
1043 Return value:
1044 start = 1
1045 nbytes = 3
1046
1047 On error, return a range with -1 for START and NBYTES. */
1048 pru_byterange
1049 pru_calc_byterange (HOST_WIDE_INT cval, machine_mode mode)
1050 {
1051 const pru_byterange invalid_range = { -1, -1 };
1052 pru_byterange r = invalid_range;
1053 enum { ST_FFS, ST_INRANGE, ST_TRAILING_ZEROS } st = ST_FFS;
1054 int i;
1055
1056 for (i = 0; i < GET_MODE_SIZE (mode); i++)
1057 {
1058 const int b = cval & ((1U << BITS_PER_UNIT) - 1);
1059 cval >>= BITS_PER_UNIT;
1060
1061 if (b == 0x00 && (st == ST_FFS || st == ST_TRAILING_ZEROS))
1062 /* No action. */;
1063 else if (b == 0x00 && st == ST_INRANGE)
1064 st = ST_TRAILING_ZEROS;
1065 else if (b == 0xff && st == ST_FFS)
1066 {
1067 st = ST_INRANGE;
1068 r.start = i;
1069 r.nbytes = 1;
1070 }
1071 else if (b == 0xff && st == ST_INRANGE)
1072 r.nbytes++;
1073 else
1074 return invalid_range;
1075 }
1076
1077 if (st != ST_TRAILING_ZEROS && st != ST_INRANGE)
1078 return invalid_range;
1079 return r;
1080 }
1081 \f
1082 /* Branches and compares. */
1083
1084 /* PRU's ALU does not support signed comparison operations. That's why we
1085 emulate them. By first checking the sign bit and handling every possible
1086 operand sign combination, we can simulate signed comparisons in just
1087 5 instructions. See table below.
1088
1089 .-------------------.---------------------------------------------------.
1090 | Operand sign bit | Mapping the signed comparison to an unsigned one |
1091 |---------+---------+------------+------------+------------+------------|
1092 | OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1093 |---------+---------+------------+------------+------------+------------|
1094 | 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1095 |---------+---------+------------+------------+------------+------------|
1096 | 0 | 1 | false | false | true | true |
1097 |---------+---------+------------+------------+------------+------------|
1098 | 1 | 0 | true | true | false | false |
1099 |---------+---------+------------+------------+------------+------------|
1100 | 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1101 `---------'---------'------------'------------'------------+------------'
1102
1103
1104 Given the table above, here is an example for a concrete op:
1105 LT:
1106 qbbc OP1_POS, OP1, 31
1107 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1108 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1109 ; jmp OUT -> can be eliminated because we'll take the
1110 ; following branch. OP2.b31 is guaranteed to be 1
1111 ; by the time we get here.
1112 OP1_POS: qbbs OUT, OP2, 31
1113 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1114 #if FAR_JUMP
1115 jmp OUT
1116 BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1117 #endif
1118 OUT:
1119
1120 */
1121
1122 /* Output asm code for a signed-compare LT/LE conditional branch. */
1123 static const char *
1124 pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1125 {
1126 static char buf[1024];
1127 enum rtx_code code = GET_CODE (operands[0]);
1128 rtx op1;
1129 rtx op2;
1130 const char *cmp_opstr;
1131 int bufi = 0;
1132
1133 op1 = operands[1];
1134 op2 = operands[2];
1135
1136 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1137
1138 /* Determine the comparison operators for positive and negative operands. */
1139 if (code == LT)
1140 cmp_opstr = "qblt";
1141 else if (code == LE)
1142 cmp_opstr = "qble";
1143 else
1144 gcc_unreachable ();
1145
1146 if (is_near)
1147 bufi = snprintf (buf, sizeof (buf),
1148 "qbbc\t.+12, %%1, %d\n\t"
1149 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1150 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1151 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1152 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1153 sign_bit_position (op1),
1154 sign_bit_position (op2),
1155 cmp_opstr,
1156 sign_bit_position (op2),
1157 cmp_opstr);
1158 else
1159 bufi = snprintf (buf, sizeof (buf),
1160 "qbbc\t.+12, %%1, %d\n\t"
1161 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1162 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1163 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1164 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1165 "jmp\t.+8\n\t" /* jmp OUT. */
1166 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1167 sign_bit_position (op1),
1168 sign_bit_position (op2),
1169 cmp_opstr,
1170 sign_bit_position (op2),
1171 cmp_opstr);
1172
1173 gcc_assert (bufi > 0);
1174 gcc_assert ((unsigned int) bufi < sizeof (buf));
1175
1176 return buf;
1177 }
1178
1179 /* Output asm code for a signed-compare GT/GE conditional branch. */
1180 static const char *
1181 pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1182 {
1183 static char buf[1024];
1184 enum rtx_code code = GET_CODE (operands[0]);
1185 rtx op1;
1186 rtx op2;
1187 const char *cmp_opstr;
1188 int bufi = 0;
1189
1190 op1 = operands[1];
1191 op2 = operands[2];
1192
1193 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1194
1195 /* Determine the comparison operators for positive and negative operands. */
1196 if (code == GT)
1197 cmp_opstr = "qbgt";
1198 else if (code == GE)
1199 cmp_opstr = "qbge";
1200 else
1201 gcc_unreachable ();
1202
1203 if (is_near)
1204 bufi = snprintf (buf, sizeof (buf),
1205 "qbbs\t.+12, %%1, %d\n\t"
1206 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1207 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1208 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1209 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1210 sign_bit_position (op1),
1211 sign_bit_position (op2),
1212 cmp_opstr,
1213 sign_bit_position (op2),
1214 cmp_opstr);
1215 else
1216 bufi = snprintf (buf, sizeof (buf),
1217 "qbbs\t.+12, %%1, %d\n\t"
1218 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1219 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1220 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1221 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1222 "jmp\t.+8\n\t" /* jmp OUT. */
1223 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1224 sign_bit_position (op1),
1225 sign_bit_position (op2),
1226 cmp_opstr,
1227 sign_bit_position (op2),
1228 cmp_opstr);
1229
1230 gcc_assert (bufi > 0);
1231 gcc_assert ((unsigned int) bufi < sizeof (buf));
1232
1233 return buf;
1234 }
1235
1236 /* Output asm code for a signed-compare conditional branch.
1237
1238 If IS_NEAR is true, then QBBx instructions may be used for reaching
1239 the destination label. Otherwise JMP is used, at the expense of
1240 increased code size. */
1241 const char *
1242 pru_output_signed_cbranch (rtx *operands, bool is_near)
1243 {
1244 enum rtx_code code = GET_CODE (operands[0]);
1245
1246 if (code == LT || code == LE)
1247 return pru_output_ltle_signed_cbranch (operands, is_near);
1248 else if (code == GT || code == GE)
1249 return pru_output_gtge_signed_cbranch (operands, is_near);
1250 else
1251 gcc_unreachable ();
1252 }
1253
1254 /* Optimized version of pru_output_signed_cbranch for constant second
1255 operand. */
1256
1257 const char *
1258 pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1259 {
1260 static char buf[1024];
1261 enum rtx_code code = GET_CODE (operands[0]);
1262 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1263 const char *cmp_opstr;
1264 const char *rcmp_opstr;
1265
1266 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1267 code = swap_condition (code);
1268
1269 /* Determine normal and reversed comparison operators for both positive
1270 operands. This enables us to go completely unsigned.
1271
1272 NOTE: We cannot use the R print modifier because we convert signed
1273 comparison operators to unsigned ones. */
1274 switch (code)
1275 {
1276 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1277 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1278 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1279 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1280 default: gcc_unreachable ();
1281 }
1282
1283 /* OP2 is a constant unsigned byte - utilize this info to generate
1284 optimized code. We can "remove half" of the op table above because
1285 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1286 if (code == LT || code == LE)
1287 {
1288 if (is_near)
1289 snprintf (buf, sizeof (buf),
1290 "qbbs\t.+8, %%1, %d\n\t"
1291 "%s\t%%l3, %%1, %%u2",
1292 regop_sign_bit_pos,
1293 cmp_opstr);
1294 else
1295 snprintf (buf, sizeof (buf),
1296 "qbbs\t.+12, %%1, %d\n\t"
1297 "%s\t.+8, %%1, %%u2\n\t"
1298 "jmp\t%%%%label(%%l3)",
1299 regop_sign_bit_pos,
1300 rcmp_opstr);
1301 }
1302 else if (code == GT || code == GE)
1303 {
1304 if (is_near)
1305 snprintf (buf, sizeof (buf),
1306 "qbbs\t%%l3, %%1, %d\n\t"
1307 "%s\t%%l3, %%1, %%u2",
1308 regop_sign_bit_pos,
1309 cmp_opstr);
1310 else
1311 snprintf (buf, sizeof (buf),
1312 "qbbs\t.+8, %%1, %d\n\t"
1313 "%s\t.+8, %%1, %%u2\n\t"
1314 "jmp\t%%%%label(%%l3)",
1315 regop_sign_bit_pos,
1316 rcmp_opstr);
1317 }
1318 else
1319 gcc_unreachable ();
1320
1321 return buf;
1322 }
1323
1324 /* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1325 zero second operand. */
1326
1327 const char *
1328 pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1329 {
1330 static char buf[1024];
1331 enum rtx_code code = GET_CODE (operands[0]);
1332 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1333
1334 /* OP2 is a constant zero - utilize this info to simply check the
1335 OP1 sign bit when comparing for LT or GE. */
1336 if (code == LT)
1337 {
1338 if (is_near)
1339 snprintf (buf, sizeof (buf),
1340 "qbbs\t%%l3, %%1, %d\n\t",
1341 regop_sign_bit_pos);
1342 else
1343 snprintf (buf, sizeof (buf),
1344 "qbbc\t.+8, %%1, %d\n\t"
1345 "jmp\t%%%%label(%%l3)",
1346 regop_sign_bit_pos);
1347 }
1348 else if (code == GE)
1349 {
1350 if (is_near)
1351 snprintf (buf, sizeof (buf),
1352 "qbbc\t%%l3, %%1, %d\n\t",
1353 regop_sign_bit_pos);
1354 else
1355 snprintf (buf, sizeof (buf),
1356 "qbbs\t.+8, %%1, %d\n\t"
1357 "jmp\t%%%%label(%%l3)",
1358 regop_sign_bit_pos);
1359 }
1360 else
1361 gcc_unreachable ();
1362
1363 return buf;
1364 }
1365
1366 /* Addressing Modes. */
1367
1368 /* Return true if register REGNO is a valid base register.
1369 STRICT_P is true if REG_OK_STRICT is in effect. */
1370
1371 bool
1372 pru_regno_ok_for_base_p (int regno, bool strict_p)
1373 {
1374 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1375 return true;
1376
1377 /* The fake registers will be eliminated to either the stack or
1378 hard frame pointer, both of which are usually valid base registers.
1379 Reload deals with the cases where the eliminated form isn't valid. */
1380 return (GP_REG_P (regno)
1381 || regno == FRAME_POINTER_REGNUM
1382 || regno == ARG_POINTER_REGNUM);
1383 }
1384
1385 /* Return true if given xbbo constant OFFSET is valid. */
1386 static bool
1387 pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1388 {
1389 bool valid = UBYTE_INT (offset);
1390
1391 /* Reload can split multi word accesses, so make sure we can address
1392 the second word in a DI. */
1393 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1394 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1395
1396 return valid;
1397 }
1398
1399 /* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1400 base was not found in the pragma-filled pru_ctable. */
1401 int
1402 pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1403 {
1404 unsigned int i;
1405
1406 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1407 {
1408 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1409 return i;
1410 }
1411 return -1;
1412 }
1413
1414
1415 /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1416 and return the base CTABLE index if possible. */
1417 int
1418 pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1419 {
1420 unsigned int i;
1421
1422 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1423 {
1424 if (pru_ctable[i].valid && IN_RANGE (caddr,
1425 pru_ctable[i].base,
1426 pru_ctable[i].base + 0xff))
1427 return i;
1428 }
1429 return -1;
1430 }
1431
1432
1433 /* Return the offset from some CTABLE base for this address. */
1434 int
1435 pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1436 {
1437 int i;
1438
1439 i = pru_get_ctable_base_index (caddr);
1440 gcc_assert (i >= 0);
1441
1442 return caddr - pru_ctable[i].base;
1443 }
1444
1445 /* Return true if the address expression formed by BASE + OFFSET is
1446 valid.
1447
1448 Note that the following address is not handled here:
1449 base CTABLE constant base + UBYTE constant offset
1450 The constants will be folded. The ctable_addr_operand predicate will take
1451 care of the validation. The CTABLE base+offset split will happen during
1452 operand printing. */
1453 static bool
1454 pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1455 {
1456 if (!strict_p && GET_CODE (base) == SUBREG)
1457 base = SUBREG_REG (base);
1458 if (!strict_p && GET_CODE (offset) == SUBREG)
1459 offset = SUBREG_REG (offset);
1460
1461 if (REG_P (base)
1462 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1463 && ((CONST_INT_P (offset)
1464 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1465 || (REG_P (offset)
1466 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1467 /* base register + register offset
1468 * OR base register + UBYTE constant offset. */
1469 return true;
1470 else if (REG_P (base)
1471 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1472 && ctable_base_operand (offset, VOIDmode))
1473 /* base CTABLE constant base + register offset
1474 * Note: GCC always puts the register as a first operand of PLUS. */
1475 return true;
1476 else
1477 return false;
1478 }
1479
1480 /* Return register number (either for r30 or r31) which maps to the
1481 corresponding symbol OP's name in the __regio_symbol address namespace.
1482
1483 If no mapping can be established (i.e. symbol name is invalid), then
1484 return -1. */
1485 int pru_symref2ioregno (rtx op)
1486 {
1487 if (!SYMBOL_REF_P (op))
1488 return -1;
1489
1490 const char *name = XSTR (op, 0);
1491 if (!strcmp (name, "__R30"))
1492 return R30_REGNUM;
1493 else if (!strcmp (name, "__R31"))
1494 return R31_REGNUM;
1495 else
1496 return -1;
1497 }
1498
1499 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. */
1500 static bool
1501 pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand,
1502 bool strict_p, addr_space_t as,
1503 code_helper = ERROR_MARK)
1504 {
1505 if (as == ADDR_SPACE_REGIO)
1506 {
1507 /* Address space constraints for __regio_symbol have been checked in
1508 TARGET_INSERT_ATTRIBUTES, and some more checks will be done
1509 during RTL expansion of "mov<mode>". */
1510 return true;
1511 }
1512 else if (as != ADDR_SPACE_GENERIC)
1513 {
1514 gcc_unreachable ();
1515 }
1516
1517 switch (GET_CODE (operand))
1518 {
1519 /* Direct. */
1520 case SYMBOL_REF:
1521 case LABEL_REF:
1522 case CONST:
1523 case CONST_WIDE_INT:
1524 return false;
1525
1526 case CONST_INT:
1527 return ctable_addr_operand (operand, VOIDmode);
1528
1529 /* Register indirect. */
1530 case REG:
1531 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1532
1533 /* Register indirect with displacement. */
1534 case PLUS:
1535 {
1536 rtx op0 = XEXP (operand, 0);
1537 rtx op1 = XEXP (operand, 1);
1538
1539 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1540 }
1541
1542 default:
1543 break;
1544 }
1545 return false;
1546 }
1547 \f
1548 /* Output assembly language related definitions. */
1549
1550 /* Implement TARGET_ASM_CONSTRUCTOR. */
1551 static void
1552 pru_elf_asm_constructor (rtx symbol, int priority)
1553 {
1554 char buf[23];
1555 section *s;
1556
1557 if (priority == DEFAULT_INIT_PRIORITY)
1558 snprintf (buf, sizeof (buf), ".init_array");
1559 else
1560 {
1561 /* While priority is known to be in range [0, 65535], so 18 bytes
1562 would be enough, the compiler might not know that. To avoid
1563 -Wformat-truncation false positive, use a larger size. */
1564 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1565 }
1566 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1567 switch_to_section (s);
1568 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1569 }
1570
1571 /* Implement TARGET_ASM_DESTRUCTOR. */
1572 static void
1573 pru_elf_asm_destructor (rtx symbol, int priority)
1574 {
1575 char buf[23];
1576 section *s;
1577
1578 if (priority == DEFAULT_INIT_PRIORITY)
1579 snprintf (buf, sizeof (buf), ".fini_array");
1580 else
1581 {
1582 /* While priority is known to be in range [0, 65535], so 18 bytes
1583 would be enough, the compiler might not know that. To avoid
1584 -Wformat-truncation false positive, use a larger size. */
1585 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1586 }
1587 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1588 switch_to_section (s);
1589 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1590 }
1591
1592 /* Map rtx_code to unsigned PRU branch op suffix. Callers must
1593 handle sign comparison themselves for signed operations. */
1594 static const char *
1595 pru_comparison_str (enum rtx_code cond)
1596 {
1597 switch (cond)
1598 {
1599 case NE: return "ne";
1600 case EQ: return "eq";
1601 case GEU: return "ge";
1602 case GTU: return "gt";
1603 case LEU: return "le";
1604 case LTU: return "lt";
1605 default: gcc_unreachable ();
1606 }
1607 }
1608
1609 /* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1610 the bit representation of X by "casting" it to CONST_INT. */
1611
1612 static rtx
1613 pru_to_int_mode (rtx x)
1614 {
1615 machine_mode mode = GET_MODE (x);
1616
1617 return VOIDmode == mode
1618 ? x
1619 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1620 }
1621
1622 /* Translate between the MachineDescription notion
1623 of 8-bit consecutive registers, to the PRU
1624 assembler syntax of REGWORD[.SUBREG]. */
1625 static const char *
1626 pru_asm_regname (rtx op)
1627 {
1628 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1629 int speci, regi;
1630
1631 gcc_assert (REG_P (op));
1632
1633 if (!canon_reg_names[0][0][0])
1634 {
1635 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1636 for (speci = 0; speci < 3; speci++)
1637 {
1638 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1639 if ((regi + sz) > (32 * 4))
1640 continue; /* Invalid entry. */
1641
1642 /* Construct the lookup table. */
1643 const char *suffix = "";
1644
1645 switch ((sz << 8) | (regi % 4))
1646 {
1647 case (1 << 8) | 0: suffix = ".b0"; break;
1648 case (1 << 8) | 1: suffix = ".b1"; break;
1649 case (1 << 8) | 2: suffix = ".b2"; break;
1650 case (1 << 8) | 3: suffix = ".b3"; break;
1651 case (2 << 8) | 0: suffix = ".w0"; break;
1652 case (2 << 8) | 1: suffix = ".w1"; break;
1653 case (2 << 8) | 2: suffix = ".w2"; break;
1654 case (4 << 8) | 0: suffix = ""; break;
1655 default:
1656 /* Invalid entry. */
1657 continue;
1658 }
1659 sprintf (&canon_reg_names[speci][regi][0],
1660 "r%d%s", regi / 4, suffix);
1661 }
1662 }
1663
1664 switch (GET_MODE_SIZE (GET_MODE (op)))
1665 {
1666 case 1: speci = 0; break;
1667 case 2: speci = 1; break;
1668 case 4: speci = 2; break;
1669 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1670 default: gcc_unreachable ();
1671 }
1672 regi = REGNO (op);
1673 gcc_assert (regi < LAST_GP_REGNUM);
1674 gcc_assert (canon_reg_names[speci][regi][0]);
1675
1676 return &canon_reg_names[speci][regi][0];
1677 }
1678
1679 /* Print the operand OP to file stream FILE modified by LETTER.
1680 LETTER can be one of:
1681
1682 b: prints the register byte start (used by LBBO/SBBO).
1683 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1684 F: Full 32-bit register.
1685 H: Higher 16-bits of a const_int operand.
1686 L: Lower 16-bits of a const_int operand.
1687 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1688 P: prints swapped condition.
1689 Q: prints swapped and reversed condition.
1690 R: prints reversed condition.
1691 S: print operand mode size (but do not print the operand itself).
1692 T: print exact_log2 () for const_int operands.
1693 u: print QI constant integer as unsigned. No transformation for regs.
1694 V: print exact_log2 () of negated const_int operands.
1695 w: Lower 32-bits of a const_int operand.
1696 W: Upper 32-bits of a const_int operand.
1697 */
1698 static void
1699 pru_print_operand (FILE *file, rtx op, int letter)
1700 {
1701 switch (letter)
1702 {
1703 case 'S':
1704 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1705 return;
1706
1707 default:
1708 break;
1709 }
1710
1711 if (comparison_operator (op, VOIDmode))
1712 {
1713 enum rtx_code cond = GET_CODE (op);
1714 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1715
1716 switch (letter)
1717 {
1718 case 0:
1719 fprintf (file, "%s", pru_comparison_str (cond));
1720 return;
1721 case 'P':
1722 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1723 return;
1724 case 'Q':
1725 cond = swap_condition (cond);
1726 /* Fall through. */
1727 case 'R':
1728 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1729 return;
1730 }
1731 }
1732
1733 switch (GET_CODE (op))
1734 {
1735 case REG:
1736 if (letter == 0 || letter == 'u')
1737 {
1738 fprintf (file, "%s", pru_asm_regname (op));
1739 return;
1740 }
1741 else if (letter == 'b')
1742 {
1743 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1744 {
1745 output_operand_lossage ("I/O register operand for '%%%c'",
1746 letter);
1747 return;
1748 }
1749 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1750 return;
1751 }
1752 else if (letter == 'F' || letter == 'N')
1753 {
1754 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1755 {
1756 output_operand_lossage ("I/O register operand for '%%%c'",
1757 letter);
1758 return;
1759 }
1760 if (REGNO (op) % 4 != 0)
1761 {
1762 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1763 letter);
1764 return;
1765 }
1766 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1767 return;
1768 }
1769 break;
1770
1771 case CONST_INT:
1772 if (letter == 'H')
1773 {
1774 HOST_WIDE_INT val = INTVAL (op);
1775 val = (val >> 16) & 0xFFFF;
1776 output_addr_const (file, gen_int_mode (val, SImode));
1777 return;
1778 }
1779 else if (letter == 'L')
1780 {
1781 HOST_WIDE_INT val = INTVAL (op);
1782 val &= 0xFFFF;
1783 output_addr_const (file, gen_int_mode (val, SImode));
1784 return;
1785 }
1786 else if (letter == 'T')
1787 {
1788 /* The predicate should have already validated the 1-high-bit
1789 requirement. Use CTZ here to deal with constant's sign
1790 extension. */
1791 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1792 if (val < 0 || val > 31)
1793 {
1794 output_operand_lossage ("invalid operand for '%%%c'", letter);
1795 return;
1796 }
1797 output_addr_const (file, gen_int_mode (val, SImode));
1798 return;
1799 }
1800 else if (letter == 'V')
1801 {
1802 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1803 if (val < 0 || val > 31)
1804 {
1805 output_operand_lossage ("invalid operand for '%%%c'", letter);
1806 return;
1807 }
1808 output_addr_const (file, gen_int_mode (val, SImode));
1809 return;
1810 }
1811 else if (letter == 'w')
1812 {
1813 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1814 output_addr_const (file, gen_int_mode (val, SImode));
1815 return;
1816 }
1817 else if (letter == 'W')
1818 {
1819 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1820 output_addr_const (file, gen_int_mode (val, SImode));
1821 return;
1822 }
1823 else if (letter == 'u')
1824 {
1825 /* Workaround GCC's representation of QI constants in sign-extended
1826 form, and PRU's assembler insistence on unsigned constant
1827 integers. See the notes about O constraint. */
1828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1829 return;
1830 }
1831 /* Else, fall through. */
1832
1833 case CONST:
1834 case LABEL_REF:
1835 case SYMBOL_REF:
1836 if (letter == 0)
1837 {
1838 output_addr_const (file, op);
1839 return;
1840 }
1841 break;
1842
1843 case CONST_FIXED:
1844 {
1845 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1846 if (letter != 0)
1847 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1848 letter);
1849 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1850 return;
1851 }
1852 break;
1853
1854 case CONST_DOUBLE:
1855 if (letter == 0)
1856 {
1857 long val;
1858
1859 if (GET_MODE (op) != SFmode)
1860 {
1861 output_operand_lossage ("double constants not supported");
1862 return;
1863 }
1864 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1865 fprintf (file, "0x%lx", val);
1866 return;
1867 }
1868 else if (letter == 'w' || letter == 'W')
1869 {
1870 long t[2];
1871 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1872 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1873 return;
1874 }
1875 else
1876 {
1877 output_operand_lossage ("invalid operand for '%%%c'", letter);
1878 return;
1879 }
1880 break;
1881
1882 case SUBREG:
1883 /* Subregs should not appear at so late stage. */
1884 gcc_unreachable ();
1885 break;
1886
1887 case MEM:
1888 if (letter == 0)
1889 {
1890 output_address (VOIDmode, op);
1891 return;
1892 }
1893 else if (letter == 'B')
1894 {
1895 rtx base = XEXP (op, 0);
1896 if (GET_CODE (base) == PLUS)
1897 {
1898 rtx op0 = XEXP (base, 0);
1899 rtx op1 = XEXP (base, 1);
1900
1901 /* PLUS cannot have two constant operands, so first one
1902 of them must be a REG, hence we must check for an
1903 exact base address. */
1904 if (ctable_base_operand (op1, VOIDmode))
1905 {
1906 fprintf (file, "c");
1907 return;
1908 }
1909 else if (REG_P (op0))
1910 {
1911 fprintf (file, "b");
1912 return;
1913 }
1914 else
1915 gcc_unreachable ();
1916 }
1917 else if (REG_P (base))
1918 {
1919 fprintf (file, "b");
1920 return;
1921 }
1922 else if (ctable_addr_operand (base, VOIDmode))
1923 {
1924 fprintf (file, "c");
1925 return;
1926 }
1927 else
1928 gcc_unreachable ();
1929 }
1930 break;
1931
1932 case CODE_LABEL:
1933 if (letter == 0)
1934 {
1935 output_addr_const (file, op);
1936 return;
1937 }
1938 break;
1939
1940 default:
1941 break;
1942 }
1943
1944 output_operand_lossage ("unsupported operand %s for code '%c'",
1945 GET_RTX_NAME (GET_CODE (op)), letter);
1946 }
1947
1948 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1949 static void
1950 pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1951 {
1952 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1953 {
1954 output_operand_lossage ("unexpected text address:");
1955 return;
1956 }
1957
1958 switch (GET_CODE (op))
1959 {
1960 case CONST:
1961 case LABEL_REF:
1962 case CONST_WIDE_INT:
1963 case SYMBOL_REF:
1964 break;
1965
1966 case CONST_INT:
1967 {
1968 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1969 int base = pru_get_ctable_base_index (caddr);
1970 int offs = pru_get_ctable_base_offset (caddr);
1971 if (base < 0)
1972 {
1973 output_operand_lossage ("unsupported constant address:");
1974 return;
1975 }
1976 fprintf (file, "%d, %d", base, offs);
1977 return;
1978 }
1979 break;
1980
1981 case PLUS:
1982 {
1983 int base;
1984 rtx op0 = XEXP (op, 0);
1985 rtx op1 = XEXP (op, 1);
1986
1987 if (REG_P (op0) && CONST_INT_P (op1)
1988 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1989 {
1990 base = pru_get_ctable_exact_base_index (INTVAL (op1));
1991 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1992 return;
1993 }
1994 else if (REG_P (op1) && CONST_INT_P (op0)
1995 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1996 {
1997 /* Not a valid RTL. */
1998 gcc_unreachable ();
1999 }
2000 else if (REG_P (op0) && CONSTANT_P (op1))
2001 {
2002 fprintf (file, "%s, ", pru_asm_regname (op0));
2003 output_addr_const (file, op1);
2004 return;
2005 }
2006 else if (REG_P (op1) && CONSTANT_P (op0))
2007 {
2008 /* Not a valid RTL. */
2009 gcc_unreachable ();
2010 }
2011 else if (REG_P (op1) && REG_P (op0))
2012 {
2013 fprintf (file, "%s, %s", pru_asm_regname (op0),
2014 pru_asm_regname (op1));
2015 return;
2016 }
2017 }
2018 break;
2019
2020 case REG:
2021 fprintf (file, "%s, 0", pru_asm_regname (op));
2022 return;
2023
2024 case MEM:
2025 {
2026 rtx base = XEXP (op, 0);
2027 pru_print_operand_address (file, mode, base);
2028 return;
2029 }
2030 default:
2031 break;
2032 }
2033
2034 output_operand_lossage ("unsupported memory expression:");
2035 }
2036
2037 /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
2038 static void
2039 pru_asm_function_prologue (FILE *file)
2040 {
2041 if (flag_verbose_asm || flag_debug_asm)
2042 pru_dump_frame_layout (file);
2043 }
2044
2045 /* Implement `TARGET_ASM_INTEGER'.
2046 Target hook for assembling integer objects. PRU version needs
2047 special handling for references to pmem. Code copied from AVR. */
2048
2049 static bool
2050 pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
2051 {
2052 if (size == POINTER_SIZE / BITS_PER_UNIT
2053 && aligned_p
2054 && text_segment_operand (x, VOIDmode))
2055 {
2056 fputs ("\t.4byte\t%pmem(", asm_out_file);
2057 output_addr_const (asm_out_file, x);
2058 fputs (")\n", asm_out_file);
2059
2060 return true;
2061 }
2062 else if (size == INIT_ARRAY_ENTRY_BYTES
2063 && aligned_p
2064 && text_segment_operand (x, VOIDmode))
2065 {
2066 fputs ("\t.2byte\t%pmem(", asm_out_file);
2067 output_addr_const (asm_out_file, x);
2068 fputs (")\n", asm_out_file);
2069
2070 return true;
2071 }
2072 else
2073 {
2074 return default_assemble_integer (x, size, aligned_p);
2075 }
2076 }
2077
2078 /* Implement TARGET_SECTION_TYPE_FLAGS. */
2079
2080 static unsigned int
2081 pru_section_type_flags (tree decl, const char *name, int reloc)
2082 {
2083 unsigned int flags = default_section_type_flags (decl, name, reloc);
2084
2085 /* The .pru_irq_map section is not meant to be loaded into the target
2086 memory. Instead its contents are read by the host remoteproc loader.
2087 To prevent being marked as a loadable (allocated) section, the
2088 .pru_irq_map section is intercepted and marked as a debug section. */
2089 if (!strcmp (name, ".pru_irq_map"))
2090 flags = SECTION_DEBUG | SECTION_RETAIN;
2091
2092 return flags;
2093 }
2094
2095 /* Implement TARGET_ASM_FILE_START. */
2096
2097 static void
2098 pru_file_start (void)
2099 {
2100 default_file_start ();
2101
2102 /* Compiler will take care of placing %label, so there is no
2103 need to confuse users with this warning. */
2104 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2105 }
2106
2107 /* Scan type TYP for pointer references to address space other than
2108 ADDR_SPACE_GENERIC. Return true if such reference is found.
2109 Much of this code was taken from the avr port. */
2110
2111 static bool
2112 pru_nongeneric_pointer_addrspace (tree typ)
2113 {
2114 while (ARRAY_TYPE == TREE_CODE (typ))
2115 typ = TREE_TYPE (typ);
2116
2117 if (POINTER_TYPE_P (typ))
2118 {
2119 addr_space_t as;
2120 tree target = TREE_TYPE (typ);
2121
2122 /* Pointer to function: Test the function's return type. */
2123 if (FUNCTION_TYPE == TREE_CODE (target))
2124 return pru_nongeneric_pointer_addrspace (TREE_TYPE (target));
2125
2126 /* "Ordinary" pointers... */
2127
2128 while (TREE_CODE (target) == ARRAY_TYPE)
2129 target = TREE_TYPE (target);
2130
2131 as = TYPE_ADDR_SPACE (target);
2132
2133 if (!ADDR_SPACE_GENERIC_P (as))
2134 return true;
2135
2136 /* Scan pointer's target type. */
2137 return pru_nongeneric_pointer_addrspace (target);
2138 }
2139
2140 return false;
2141 }
2142
2143 /* Implement `TARGET_INSERT_ATTRIBUTES'. For PRU it's used as a hook to
2144 provide better diagnostics for some invalid usages of the __regio_symbol
2145 address space.
2146
2147 Any escapes of the following checks are supposed to be caught
2148 during the "mov<mode>" pattern expansion. */
2149
2150 static void
2151 pru_insert_attributes (tree node, tree *attributes ATTRIBUTE_UNUSED)
2152 {
2153
2154 /* Validate __regio_symbol variable declarations. */
2155 if (VAR_P (node))
2156 {
2157 const char *name = DECL_NAME (node)
2158 ? IDENTIFIER_POINTER (DECL_NAME (node))
2159 : "<unknown>";
2160 tree typ = TREE_TYPE (node);
2161 addr_space_t as = TYPE_ADDR_SPACE (typ);
2162
2163 if (as == ADDR_SPACE_GENERIC)
2164 return;
2165
2166 if (AGGREGATE_TYPE_P (typ))
2167 {
2168 error ("aggregate types are prohibited in "
2169 "%<__regio_symbol%> address space");
2170 /* Don't bother anymore. Below checks would pile
2171 meaningless errors, which would confuse user. */
2172 return;
2173 }
2174 if (DECL_INITIAL (node) != NULL_TREE)
2175 error ("variables in %<__regio_symbol%> address space "
2176 "cannot have initial value");
2177 if (DECL_REGISTER (node))
2178 error ("variables in %<__regio_symbol%> address space "
2179 "cannot be declared %<register%>");
2180 if (!TYPE_VOLATILE (typ))
2181 error ("variables in %<__regio_symbol%> address space "
2182 "must be declared %<volatile%>");
2183 if (!DECL_EXTERNAL (node))
2184 error ("variables in %<__regio_symbol%> address space "
2185 "must be declared %<extern%>");
2186 if (TYPE_MODE (typ) != SImode)
2187 error ("only 32-bit access is supported "
2188 "for %<__regio_symbol%> address space");
2189 if (strcmp (name, "__R30") != 0 && strcmp (name, "__R31") != 0)
2190 error ("register name %<%s%> not recognized "
2191 "in %<__regio_symbol%> address space", name);
2192 }
2193
2194 tree typ = NULL_TREE;
2195
2196 switch (TREE_CODE (node))
2197 {
2198 case FUNCTION_DECL:
2199 typ = TREE_TYPE (TREE_TYPE (node));
2200 break;
2201 case TYPE_DECL:
2202 case RESULT_DECL:
2203 case VAR_DECL:
2204 case FIELD_DECL:
2205 case PARM_DECL:
2206 typ = TREE_TYPE (node);
2207 break;
2208 case POINTER_TYPE:
2209 typ = node;
2210 break;
2211 default:
2212 break;
2213 }
2214 if (typ != NULL_TREE && pru_nongeneric_pointer_addrspace (typ))
2215 error ("pointers to %<__regio_symbol%> address space are prohibited");
2216 }
2217 \f
2218 /* Function argument related. */
2219
2220 /* Return the number of bytes needed for storing an argument with
2221 the given MODE and TYPE. */
2222 static int
2223 pru_function_arg_size (machine_mode mode, const_tree type)
2224 {
2225 HOST_WIDE_INT param_size;
2226
2227 if (mode == BLKmode)
2228 param_size = int_size_in_bytes (type);
2229 else
2230 param_size = GET_MODE_SIZE (mode);
2231
2232 /* Convert to words (round up). */
2233 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2234 gcc_assert (param_size >= 0);
2235
2236 return param_size;
2237 }
2238
2239 /* Check if argument with the given size must be
2240 passed/returned in a register.
2241
2242 Reference:
2243 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2244
2245 Arguments other than 8/16/24/32/64bits are passed on stack. */
2246 static bool
2247 pru_arg_in_reg_bysize (size_t sz)
2248 {
2249 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2250 }
2251
2252 /* Helper function to get the starting storage HW register for an argument,
2253 or -1 if it must be passed on stack. The cum_v state is not changed. */
2254 static int
2255 pru_function_arg_regi (cumulative_args_t cum_v,
2256 machine_mode mode, const_tree type,
2257 bool named)
2258 {
2259 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2260 size_t argsize = pru_function_arg_size (mode, type);
2261 size_t i, bi;
2262 int regi = -1;
2263
2264 if (!pru_arg_in_reg_bysize (argsize))
2265 return -1;
2266
2267 if (!named)
2268 return -1;
2269
2270 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2271 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2272 {
2273 /* VLAs and vector types are not defined in the PRU ABI. Let's
2274 handle them the same as their same-sized counterparts. This way
2275 we do not need to treat BLKmode differently, and need only to check
2276 the size. */
2277 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2278 || argsize == 4 || argsize == 8);
2279
2280 /* Ensure SI and DI arguments are stored in full registers only. */
2281 if ((argsize >= 4) && (i % 4) != 0)
2282 continue;
2283
2284 /* Structures with size 24 bits are passed starting at a full
2285 register boundary. */
2286 if (argsize == 3 && (i % 4) != 0)
2287 continue;
2288
2289 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2290 into a different full register. */
2291 if (argsize == 2 && (i % 4) == 3)
2292 continue;
2293
2294 for (bi = 0;
2295 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2296 bi++)
2297 {
2298 if (cum->regs_used[bi + i])
2299 break;
2300 }
2301 if (bi == argsize)
2302 regi = FIRST_ARG_REGNUM + i;
2303 }
2304
2305 return regi;
2306 }
2307
2308 /* Mark CUM_V that a function argument will occupy HW register slot starting
2309 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2310 depends on the MODE and TYPE of the argument. */
2311 static void
2312 pru_function_arg_regi_mark_slot (int regi,
2313 cumulative_args_t cum_v,
2314 machine_mode mode, const_tree type,
2315 bool named)
2316 {
2317 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2318 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2319
2320 gcc_assert (named);
2321
2322 /* Mark all byte sub-registers occupied by argument as used. */
2323 while (param_size--)
2324 {
2325 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2326 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2327 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2328 regi++;
2329 }
2330 }
2331
2332 /* Define where to put the arguments to a function. Value is zero to
2333 push the argument on the stack, or a hard register in which to
2334 store the argument.
2335
2336 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2337 the preceding args and about the function being called.
2338 ARG is a description of the argument. */
2339
2340 static rtx
2341 pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2342 {
2343 rtx return_rtx = NULL_RTX;
2344 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2345
2346 if (regi >= 0)
2347 return_rtx = gen_rtx_REG (arg.mode, regi);
2348
2349 return return_rtx;
2350 }
2351
2352 /* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2353 between registers and memory, so we can return 0. */
2354
2355 static int
2356 pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
2357 {
2358 return 0;
2359 }
2360
2361 /* Update the data in CUM to advance over argument ARG. */
2362
2363 static void
2364 pru_function_arg_advance (cumulative_args_t cum_v,
2365 const function_arg_info &arg)
2366 {
2367 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2368
2369 if (regi >= 0)
2370 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2371 arg.type, arg.named);
2372 }
2373
2374 /* Implement TARGET_FUNCTION_VALUE. */
2375 static rtx
2376 pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2377 bool outgoing ATTRIBUTE_UNUSED)
2378 {
2379 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2380 }
2381
2382 /* Implement TARGET_LIBCALL_VALUE. */
2383 static rtx
2384 pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2385 {
2386 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2387 }
2388
2389 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2390 static bool
2391 pru_function_value_regno_p (const unsigned int regno)
2392 {
2393 return regno == FIRST_RETVAL_REGNUM;
2394 }
2395
2396 /* Implement TARGET_RETURN_IN_MEMORY. */
2397 bool
2398 pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2399 {
2400 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2401 || int_size_in_bytes (type) == -1);
2402
2403 return in_memory;
2404 }
2405 \f
2406 /* Implement TARGET_CAN_USE_DOLOOP_P. */
2407
2408 static bool
2409 pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2410 unsigned int loop_depth, bool)
2411 {
2412 /* Considering limitations in the hardware, only use doloop
2413 for innermost loops which must be entered from the top. */
2414 if (loop_depth > 1)
2415 return false;
2416 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2417 holds the maximum number of loop latch executions, while PRU loop
2418 instruction needs the count of loop body executions. */
2419 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2420 return false;
2421
2422 return true;
2423 }
2424
2425 /* NULL if INSN insn is valid within a low-overhead loop.
2426 Otherwise return why doloop cannot be applied. */
2427
2428 static const char *
2429 pru_invalid_within_doloop (const rtx_insn *insn)
2430 {
2431 if (CALL_P (insn))
2432 return "Function call in the loop.";
2433
2434 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2435 return "Return from a call instruction in the loop.";
2436
2437 if (NONDEBUG_INSN_P (insn)
2438 && INSN_CODE (insn) < 0
2439 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2440 || asm_noperands (PATTERN (insn)) >= 0))
2441 return "Loop contains asm statement.";
2442
2443 return NULL;
2444 }
2445
2446
2447 /* Figure out where to put LABEL, which is the label for a repeat loop.
2448 The loop ends just before LAST_INSN. If SHARED, insns other than the
2449 "repeat" might use LABEL to jump to the loop's continuation point.
2450
2451 Return the last instruction in the adjusted loop. */
2452
2453 static rtx_insn *
2454 pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2455 bool shared)
2456 {
2457 rtx_insn *next, *prev;
2458 int count = 0, code, icode;
2459
2460 if (dump_file)
2461 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2462 INSN_UID (last_insn));
2463
2464 /* Set PREV to the last insn in the loop. */
2465 prev = PREV_INSN (last_insn);
2466
2467 /* Set NEXT to the next insn after the loop label. */
2468 next = last_insn;
2469 if (!shared)
2470 while (prev != 0)
2471 {
2472 code = GET_CODE (prev);
2473 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2474 break;
2475
2476 if (INSN_P (prev))
2477 {
2478 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2479 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2480
2481 /* Other insns that should not be in the last two opcodes. */
2482 icode = recog_memoized (prev);
2483 if (icode < 0
2484 || icode == CODE_FOR_pruloophi
2485 || icode == CODE_FOR_pruloopsi)
2486 break;
2487
2488 count++;
2489 next = prev;
2490 if (dump_file)
2491 print_rtl_single (dump_file, next);
2492 if (count == 2)
2493 break;
2494 }
2495 prev = PREV_INSN (prev);
2496 }
2497
2498 /* Insert the nops. */
2499 if (dump_file && count < 2)
2500 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2501 2 - count, count == 1 ? "" : "s");
2502
2503 for (; count < 2; count++)
2504 emit_insn_before (gen_nop (), last_insn);
2505
2506 /* Insert the label. */
2507 emit_label_before (label, last_insn);
2508
2509 return last_insn;
2510 }
2511
2512 /* If IS_END is false, expand a canonical doloop_begin RTL into the
2513 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2514 doloop_end_internal. */
2515 void
2516 pru_emit_doloop (rtx *operands, int is_end)
2517 {
2518 rtx tag;
2519
2520 if (cfun->machine->doloop_tags == 0
2521 || cfun->machine->doloop_tag_from_end == is_end)
2522 {
2523 cfun->machine->doloop_tags++;
2524 cfun->machine->doloop_tag_from_end = is_end;
2525 }
2526
2527 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2528 machine_mode opmode = GET_MODE (operands[0]);
2529 gcc_assert (opmode == HImode || opmode == SImode);
2530
2531 if (is_end)
2532 emit_jump_insn (gen_doloop_end_internal (opmode, operands[0],
2533 operands[1], tag));
2534 else
2535 emit_insn (gen_doloop_begin_internal (opmode, operands[0],
2536 operands[0], tag));
2537 }
2538
2539
2540 /* Code for converting doloop_begins and doloop_ends into valid
2541 PRU instructions. Idea and code snippets borrowed from mep port.
2542
2543 A doloop_begin is just a placeholder:
2544
2545 $count = unspec ($count)
2546
2547 where $count is initially the number of iterations.
2548 doloop_end has the form:
2549
2550 if (--$count == 0) goto label
2551
2552 The counter variable is private to the doloop insns, nothing else
2553 relies on its value.
2554
2555 There are three cases, in decreasing order of preference:
2556
2557 1. A loop has exactly one doloop_begin and one doloop_end.
2558 The doloop_end branches to the first instruction after
2559 the doloop_begin.
2560
2561 In this case we can replace the doloop_begin with a LOOP
2562 instruction and remove the doloop_end. I.e.:
2563
2564 $count1 = unspec ($count1)
2565 label:
2566 ...
2567 if (--$count2 != 0) goto label
2568
2569 becomes:
2570
2571 LOOP end_label,$count1
2572 label:
2573 ...
2574 end_label:
2575 # end loop
2576
2577 2. As for (1), except there are several doloop_ends. One of them
2578 (call it X) falls through to a label L. All the others fall
2579 through to branches to L.
2580
2581 In this case, we remove X and replace the other doloop_ends
2582 with branches to the LOOP label. For example:
2583
2584 $count1 = unspec ($count1)
2585 label:
2586 ...
2587 if (--$count1 != 0) goto label
2588 end_label:
2589 ...
2590 if (--$count2 != 0) goto label
2591 goto end_label
2592
2593 becomes:
2594
2595 LOOP end_label,$count1
2596 label:
2597 ...
2598 end_label:
2599 # end repeat
2600 ...
2601 goto end_label
2602
2603 3. The fallback case. Replace doloop_begins with:
2604
2605 $count = $count
2606
2607 Replace doloop_ends with the equivalent of:
2608
2609 $count = $count - 1
2610 if ($count != 0) goto loop_label
2611
2612 */
2613
2614 /* A structure describing one doloop_begin. */
2615 struct pru_doloop_begin {
2616 /* The next doloop_begin with the same tag. */
2617 struct pru_doloop_begin *next;
2618
2619 /* The instruction itself. */
2620 rtx_insn *insn;
2621
2622 /* The initial counter value. */
2623 rtx loop_count;
2624
2625 /* The counter register. */
2626 rtx counter;
2627 };
2628
2629 /* A structure describing a doloop_end. */
2630 struct pru_doloop_end {
2631 /* The next doloop_end with the same loop tag. */
2632 struct pru_doloop_end *next;
2633
2634 /* The instruction itself. */
2635 rtx_insn *insn;
2636
2637 /* The first instruction after INSN when the branch isn't taken. */
2638 rtx_insn *fallthrough;
2639
2640 /* The location of the counter value. Since doloop_end_internal is a
2641 jump instruction, it has to allow the counter to be stored anywhere
2642 (any non-fixed register). */
2643 rtx counter;
2644
2645 /* The target label (the place where the insn branches when the counter
2646 isn't zero). */
2647 rtx label;
2648
2649 /* A scratch register. Only available when COUNTER isn't stored
2650 in a general register. */
2651 rtx scratch;
2652 };
2653
2654
2655 /* One do-while loop. */
2656 struct pru_doloop {
2657 /* All the doloop_begins for this loop (in no particular order). */
2658 struct pru_doloop_begin *begin;
2659
2660 /* All the doloop_ends. When there is more than one, arrange things
2661 so that the first one is the most likely to be X in case (2) above. */
2662 struct pru_doloop_end *end;
2663 };
2664
2665
2666 /* Return true if LOOP can be converted into LOOP form
2667 (that is, if it matches cases (1) or (2) above). */
2668
2669 static bool
2670 pru_repeat_loop_p (struct pru_doloop *loop)
2671 {
2672 struct pru_doloop_end *end;
2673 rtx_insn *fallthrough;
2674
2675 /* There must be exactly one doloop_begin and at least one doloop_end. */
2676 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2677 return false;
2678
2679 /* The first doloop_end (X) must branch back to the insn after
2680 the doloop_begin. */
2681 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2682 return false;
2683
2684 /* Check that the first doloop_end (X) can actually reach
2685 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2686 if (get_attr_length (loop->end->insn) != 4)
2687 return false;
2688
2689 /* All the other doloop_ends must branch to the same place as X.
2690 When the branch isn't taken, they must jump to the instruction
2691 after X. */
2692 fallthrough = loop->end->fallthrough;
2693 for (end = loop->end->next; end != 0; end = end->next)
2694 if (end->label != loop->end->label
2695 || !simplejump_p (end->fallthrough)
2696 || fallthrough
2697 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2698 return false;
2699
2700 return true;
2701 }
2702
2703
2704 /* The main repeat reorg function. See comment above for details. */
2705
2706 static void
2707 pru_reorg_loop (rtx_insn *insns)
2708 {
2709 rtx_insn *insn;
2710 struct pru_doloop *loops, *loop;
2711 struct pru_doloop_begin *begin;
2712 struct pru_doloop_end *end;
2713 size_t tmpsz;
2714
2715 /* Quick exit if we haven't created any loops. */
2716 if (cfun->machine->doloop_tags == 0)
2717 return;
2718
2719 /* Create an array of pru_doloop structures. */
2720 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2721 loops = (struct pru_doloop *) alloca (tmpsz);
2722 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2723
2724 /* Search the function for do-while insns and group them by loop tag. */
2725 for (insn = insns; insn; insn = NEXT_INSN (insn))
2726 if (INSN_P (insn))
2727 switch (recog_memoized (insn))
2728 {
2729 case CODE_FOR_doloop_begin_internalhi:
2730 case CODE_FOR_doloop_begin_internalsi:
2731 insn_extract (insn);
2732 loop = &loops[INTVAL (recog_data.operand[2])];
2733
2734 tmpsz = sizeof (struct pru_doloop_begin);
2735 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2736 begin->next = loop->begin;
2737 begin->insn = insn;
2738 begin->loop_count = recog_data.operand[1];
2739 begin->counter = recog_data.operand[0];
2740
2741 loop->begin = begin;
2742 break;
2743
2744 case CODE_FOR_doloop_end_internalhi:
2745 case CODE_FOR_doloop_end_internalsi:
2746 insn_extract (insn);
2747 loop = &loops[INTVAL (recog_data.operand[2])];
2748
2749 tmpsz = sizeof (struct pru_doloop_end);
2750 end = (struct pru_doloop_end *) alloca (tmpsz);
2751 end->insn = insn;
2752 end->fallthrough = next_real_insn (insn);
2753 end->counter = recog_data.operand[0];
2754 end->label = recog_data.operand[1];
2755 end->scratch = recog_data.operand[3];
2756
2757 /* If this insn falls through to an unconditional jump,
2758 give it a lower priority than the others. */
2759 if (loop->end != 0 && simplejump_p (end->fallthrough))
2760 {
2761 end->next = loop->end->next;
2762 loop->end->next = end;
2763 }
2764 else
2765 {
2766 end->next = loop->end;
2767 loop->end = end;
2768 }
2769 break;
2770 }
2771
2772 /* Convert the insns for each loop in turn. */
2773 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2774 if (pru_repeat_loop_p (loop))
2775 {
2776 /* Case (1) or (2). */
2777 rtx_code_label *repeat_label;
2778 rtx label_ref;
2779 rtx loop_rtx;
2780
2781 /* Create a new label for the repeat insn. */
2782 repeat_label = gen_label_rtx ();
2783
2784 /* Replace the doloop_begin with a repeat. We get rid
2785 of the iteration register because LOOP instruction
2786 will utilize an internal for the PRU core LOOP register. */
2787 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2788 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
2789 if (loop_mode == VOIDmode)
2790 {
2791 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2792 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
2793 loop_mode = SImode;
2794 }
2795 gcc_assert (loop_mode == HImode || loop_mode == SImode);
2796 loop_rtx = gen_pruloop (loop_mode, loop->begin->loop_count, label_ref);
2797 emit_insn_before (loop_rtx, loop->begin->insn);
2798
2799 delete_insn (loop->begin->insn);
2800
2801 /* Insert the repeat label before the first doloop_end.
2802 Fill the gap with nops if LOOP insn is less than 2
2803 instructions away than loop->end. */
2804 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2805 loop->end->next != 0);
2806
2807 /* Emit a pruloop_end (to improve the readability of the output). */
2808 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2809
2810 /* HACK: TODO: This is usually not needed, but is required for
2811 a few rare cases where a JUMP that breaks the loop
2812 references the LOOP_END address. In other words, since
2813 we're missing a real "loop_end" instruction, a loop "break"
2814 may accidentally reference the loop end itself, and thus
2815 continuing the cycle. */
2816 for (insn = NEXT_INSN (loop->end->insn);
2817 insn != next_real_insn (loop->end->insn);
2818 insn = NEXT_INSN (insn))
2819 {
2820 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2821 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2822 }
2823
2824 /* Delete the first doloop_end. */
2825 delete_insn (loop->end->insn);
2826
2827 /* Replace the others with branches to REPEAT_LABEL. */
2828 for (end = loop->end->next; end != 0; end = end->next)
2829 {
2830 rtx_insn *newjmp;
2831 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2832 JUMP_LABEL (newjmp) = repeat_label;
2833 delete_insn (end->insn);
2834 delete_insn (end->fallthrough);
2835 }
2836 }
2837 else
2838 {
2839 /* Case (3). First replace all the doloop_begins with setting
2840 the HW register used for loop counter. */
2841 for (begin = loop->begin; begin != 0; begin = begin->next)
2842 {
2843 insn = gen_move_insn (copy_rtx (begin->counter),
2844 copy_rtx (begin->loop_count));
2845 emit_insn_before (insn, begin->insn);
2846 delete_insn (begin->insn);
2847 }
2848
2849 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2850 for (end = loop->end; end != 0; end = end->next)
2851 {
2852 rtx reg;
2853
2854 start_sequence ();
2855
2856 /* Load the counter value into a general register. */
2857 reg = end->counter;
2858 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2859 {
2860 reg = end->scratch;
2861 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2862 }
2863
2864 /* Decrement the counter. */
2865 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2866 constm1_rtx));
2867
2868 /* Copy it back to its original location. */
2869 if (reg != end->counter)
2870 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2871
2872 /* Jump back to the start label. */
2873 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2874 const0_rtx),
2875 reg,
2876 const0_rtx,
2877 end->label));
2878
2879 JUMP_LABEL (insn) = end->label;
2880 LABEL_NUSES (end->label)++;
2881
2882 /* Emit the whole sequence before the doloop_end. */
2883 insn = get_insns ();
2884 end_sequence ();
2885 emit_insn_before (insn, end->insn);
2886
2887 /* Delete the doloop_end. */
2888 delete_insn (end->insn);
2889 }
2890 }
2891 }
2892
2893 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2894 static void
2895 pru_reorg (void)
2896 {
2897 rtx_insn *insns = get_insns ();
2898
2899 compute_bb_for_insn ();
2900 df_analyze ();
2901
2902 /* Need correct insn lengths for allowing LOOP instruction
2903 emitting due to U8_PCREL limitations. */
2904 shorten_branches (get_insns ());
2905
2906 /* The generic reorg_loops () is not suitable for PRU because
2907 it doesn't handle doloop_begin/end tying. And we need our
2908 doloop_begin emitted before reload. It is difficult to coalesce
2909 UBYTE constant initial loop values into the LOOP insn during
2910 machine reorg phase. */
2911 pru_reorg_loop (insns);
2912
2913 df_finish_pass (false);
2914 }
2915 \f
2916 /* Enumerate all PRU-specific builtins. */
2917 enum pru_builtin
2918 {
2919 PRU_BUILTIN_DELAY_CYCLES,
2920 PRU_BUILTIN_HALT,
2921 PRU_BUILTIN_LMBD,
2922 PRU_BUILTIN_max
2923 };
2924
2925 static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2926
2927 /* Implement TARGET_INIT_BUILTINS. */
2928
2929 static void
2930 pru_init_builtins (void)
2931 {
2932 tree void_ftype_longlong
2933 = build_function_type_list (void_type_node,
2934 long_long_integer_type_node,
2935 NULL);
2936 tree uint_ftype_uint_uint
2937 = build_function_type_list (unsigned_type_node,
2938 unsigned_type_node,
2939 unsigned_type_node,
2940 NULL);
2941
2942 tree void_ftype_void
2943 = build_function_type_list (void_type_node,
2944 void_type_node,
2945 NULL);
2946
2947 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2948 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2949 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2950 NULL_TREE);
2951
2952 pru_builtins[PRU_BUILTIN_HALT]
2953 = add_builtin_function ("__halt", void_ftype_void,
2954 PRU_BUILTIN_HALT, BUILT_IN_MD, NULL,
2955 NULL_TREE);
2956
2957 pru_builtins[PRU_BUILTIN_LMBD]
2958 = add_builtin_function ("__lmbd", uint_ftype_uint_uint,
2959 PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL,
2960 NULL_TREE);
2961 }
2962
2963 /* Implement TARGET_BUILTIN_DECL. */
2964
2965 static tree
2966 pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2967 {
2968 switch (code)
2969 {
2970 case PRU_BUILTIN_DELAY_CYCLES:
2971 case PRU_BUILTIN_HALT:
2972 case PRU_BUILTIN_LMBD:
2973 return pru_builtins[code];
2974 default:
2975 return error_mark_node;
2976 }
2977 }
2978 \f
2979 /* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2980 code that delays exactly ARG cycles. */
2981
2982 static rtx
2983 pru_expand_delay_cycles (rtx arg)
2984 {
2985 HOST_WIDE_INT c, n;
2986
2987 if (GET_CODE (arg) != CONST_INT)
2988 {
2989 error ("%<__delay_cycles%> only takes constant arguments");
2990 return NULL_RTX;
2991 }
2992
2993 c = INTVAL (arg);
2994
2995 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2996 if (c < 0)
2997 {
2998 error ("%<__delay_cycles%> only takes non-negative cycle counts");
2999 return NULL_RTX;
3000 }
3001
3002 emit_insn (gen_delay_cycles_start (arg));
3003
3004 /* For 32-bit loops, there's 2 + 2x cycles. */
3005 if (c > 2 * 0xffff + 1)
3006 {
3007 n = (c - 2) / 2;
3008 c -= (n * 2) + 2;
3009 if ((unsigned long long) n > 0xffffffffULL)
3010 {
3011 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
3012 return NULL_RTX;
3013 }
3014 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
3015 }
3016
3017 /* For 16-bit loops, there's 1 + 2x cycles. */
3018 if (c > 2)
3019 {
3020 n = (c - 1) / 2;
3021 c -= (n * 2) + 1;
3022
3023 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
3024 }
3025
3026 while (c > 0)
3027 {
3028 emit_insn (gen_delay_cycles_1 ());
3029 c -= 1;
3030 }
3031
3032 emit_insn (gen_delay_cycles_end (arg));
3033
3034 return NULL_RTX;
3035 }
3036
3037
3038 /* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
3039 a built-in function, with result going to TARGET if that's convenient
3040 (and in mode MODE if that's convenient).
3041 SUBTARGET may be used as the target for computing one of EXP's operands.
3042 IGNORE is nonzero if the value is to be ignored. */
3043
3044 static rtx
3045 pru_expand_builtin (tree exp, rtx target,
3046 rtx subtarget ATTRIBUTE_UNUSED,
3047 machine_mode mode,
3048 int ignore ATTRIBUTE_UNUSED)
3049 {
3050 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3051 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
3052
3053 switch (fcode)
3054 {
3055 case PRU_BUILTIN_DELAY_CYCLES:
3056 {
3057 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3058 return pru_expand_delay_cycles (arg1);
3059 }
3060 break;
3061 case PRU_BUILTIN_HALT:
3062 {
3063 emit_insn (gen_pru_halt ());
3064 return NULL_RTX;
3065 }
3066 break;
3067 case PRU_BUILTIN_LMBD:
3068 {
3069 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3070 rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
3071
3072 if (target == NULL_RTX || GET_MODE (target) != mode)
3073 {
3074 target = gen_reg_rtx (mode);
3075 }
3076
3077 emit_insn (gen_pru_lmbd (mode, target, arg1, arg2));
3078 return target;
3079 }
3080 break;
3081 default:
3082 internal_error ("bad builtin code");
3083 }
3084
3085 return NULL_RTX;
3086 }
3087 \f
3088 /* Remember the last target of pru_set_current_function. */
3089 static GTY(()) tree pru_previous_fndecl;
3090
3091 /* Establish appropriate back-end context for processing the function
3092 FNDECL. The argument might be NULL to indicate processing at top
3093 level, outside of any function scope. */
3094 static void
3095 pru_set_current_function (tree fndecl)
3096 {
3097 tree old_tree = (pru_previous_fndecl
3098 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
3099 : NULL_TREE);
3100
3101 tree new_tree = (fndecl
3102 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3103 : NULL_TREE);
3104
3105 if (fndecl && fndecl != pru_previous_fndecl)
3106 {
3107 pru_previous_fndecl = fndecl;
3108 if (old_tree == new_tree)
3109 ;
3110
3111 else if (new_tree)
3112 {
3113 cl_target_option_restore (&global_options, &global_options_set,
3114 TREE_TARGET_OPTION (new_tree));
3115 target_reinit ();
3116 }
3117
3118 else if (old_tree)
3119 {
3120 struct cl_target_option *def
3121 = TREE_TARGET_OPTION (target_option_current_node);
3122
3123 cl_target_option_restore (&global_options, &global_options_set, def);
3124 target_reinit ();
3125 }
3126 }
3127 }
3128 \f
3129 /* Implement TARGET_UNWIND_WORD_MODE.
3130
3131 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
3132 static scalar_int_mode
3133 pru_unwind_word_mode (void)
3134 {
3135 return SImode;
3136 }
3137 \f
3138
3139 /* Initialize the GCC target structure. */
3140 #undef TARGET_ASM_FUNCTION_PROLOGUE
3141 #define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
3142 #undef TARGET_ASM_INTEGER
3143 #define TARGET_ASM_INTEGER pru_assemble_integer
3144 #undef TARGET_SECTION_TYPE_FLAGS
3145 #define TARGET_SECTION_TYPE_FLAGS pru_section_type_flags
3146
3147 #undef TARGET_ASM_FILE_START
3148 #define TARGET_ASM_FILE_START pru_file_start
3149
3150 #undef TARGET_INSERT_ATTRIBUTES
3151 #define TARGET_INSERT_ATTRIBUTES pru_insert_attributes
3152
3153 #undef TARGET_INIT_BUILTINS
3154 #define TARGET_INIT_BUILTINS pru_init_builtins
3155 #undef TARGET_EXPAND_BUILTIN
3156 #define TARGET_EXPAND_BUILTIN pru_expand_builtin
3157 #undef TARGET_BUILTIN_DECL
3158 #define TARGET_BUILTIN_DECL pru_builtin_decl
3159
3160 #undef TARGET_COMPUTE_FRAME_LAYOUT
3161 #define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
3162
3163 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
3164 #define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
3165
3166 #undef TARGET_CAN_ELIMINATE
3167 #define TARGET_CAN_ELIMINATE pru_can_eliminate
3168
3169 #undef TARGET_HARD_REGNO_MODE_OK
3170 #define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
3171
3172 #undef TARGET_HARD_REGNO_SCRATCH_OK
3173 #define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
3174
3175 #undef TARGET_FUNCTION_ARG
3176 #define TARGET_FUNCTION_ARG pru_function_arg
3177
3178 #undef TARGET_FUNCTION_ARG_ADVANCE
3179 #define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
3180
3181 #undef TARGET_ARG_PARTIAL_BYTES
3182 #define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
3183
3184 #undef TARGET_FUNCTION_VALUE
3185 #define TARGET_FUNCTION_VALUE pru_function_value
3186
3187 #undef TARGET_LIBCALL_VALUE
3188 #define TARGET_LIBCALL_VALUE pru_libcall_value
3189
3190 #undef TARGET_FUNCTION_VALUE_REGNO_P
3191 #define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
3192
3193 #undef TARGET_RETURN_IN_MEMORY
3194 #define TARGET_RETURN_IN_MEMORY pru_return_in_memory
3195
3196 #undef TARGET_MUST_PASS_IN_STACK
3197 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
3198
3199 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
3200 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
3201 pru_addr_space_legitimate_address_p
3202
3203 #undef TARGET_INIT_LIBFUNCS
3204 #define TARGET_INIT_LIBFUNCS pru_init_libfuncs
3205 #undef TARGET_LIBFUNC_GNU_PREFIX
3206 #define TARGET_LIBFUNC_GNU_PREFIX true
3207
3208 #undef TARGET_RTX_COSTS
3209 #define TARGET_RTX_COSTS pru_rtx_costs
3210
3211 #undef TARGET_INSN_COST
3212 #define TARGET_INSN_COST pru_insn_cost
3213
3214 #undef TARGET_PRINT_OPERAND
3215 #define TARGET_PRINT_OPERAND pru_print_operand
3216
3217 #undef TARGET_PRINT_OPERAND_ADDRESS
3218 #define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3219
3220 #undef TARGET_OPTION_OVERRIDE
3221 #define TARGET_OPTION_OVERRIDE pru_option_override
3222
3223 #undef TARGET_SET_CURRENT_FUNCTION
3224 #define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3225
3226 #undef TARGET_MACHINE_DEPENDENT_REORG
3227 #define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3228
3229 #undef TARGET_CAN_USE_DOLOOP_P
3230 #define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3231
3232 #undef TARGET_INVALID_WITHIN_DOLOOP
3233 #define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3234
3235 #undef TARGET_UNWIND_WORD_MODE
3236 #define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3237
3238 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3239 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3240
3241 struct gcc_target targetm = TARGET_INITIALIZER;
3242
3243 #include "gt-pru.h"