]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pru/pru.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / pru / pru.c
1 /* Target machine subroutines for TI PRU.
2 Copyright (C) 2014-2021 Free Software Foundation, Inc.
3 Dimitar Dimitrov <dimitar@dinux.eu>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "df.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "explow.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "expr.h"
47 #include "toplev.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "stor-layout.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
53 #include "pru-protos.h"
54
55 /* This file should be included last. */
56 #include "target-def.h"
57
58 #define INIT_ARRAY_ENTRY_BYTES 2
59
60 /* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62 struct pru_ctable_entry pru_ctable[32];
63
64 /* Forward function declarations. */
65 static bool prologue_saved_reg_p (int);
66 static void pru_reorg_loop (rtx_insn *);
67
68 struct GTY (()) machine_function
69 {
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
72
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
91 };
92 \f
93 /* Stack layout and calling conventions.
94
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
98
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
110
111 */
112
113 #define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
114
115 /* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116 static void
117 pru_compute_frame_layout (void)
118 {
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
125
126 gcc_assert (!cfun->machine->initialized);
127
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
130
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
134
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
139 {
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
142 }
143
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
147
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
155 }
156
157 /* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
163 */
164 static rtx
165 pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
167 {
168 rtx insn;
169
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
171
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
175 {
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
182 {
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
185 }
186 else
187 {
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
190 }
191 }
192
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
196
197 RTX_FRAME_RELATED_P (insn) = 1;
198
199 return insn;
200 }
201
202 /* Add a const_int to the stack pointer register. */
203 static rtx
204 pru_add_to_sp (int addendum, const enum reg_note kind)
205 {
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
208 }
209
210 /* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212 static int
213 xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
214 {
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
218
219 nregs = 0;
220
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
227
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
231 {
232 regno++;
233 nregs++;
234 }
235 else
236 break;
237
238 if (!nregs)
239 return -1;
240
241 gcc_assert (UBYTE_INT (*sp_offset));
242
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
245
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
254
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
258
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
261
262 if (!do_store)
263 {
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
267 }
268
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
271
272 return regno_start + nregs;
273 }
274
275 /* Emit function prologue. */
276 void
277 pru_expand_prologue (void)
278 {
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
284
285 total_frame_size = cfun->machine->total_size;
286
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
289
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
292 {
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
298 }
299 else if (total_frame_size)
300 {
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
304 }
305 else
306 save_regs_base = sp_offset = 0;
307
308 regno_start = 0;
309 save_offset = save_regs_base;
310 do
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
313
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
317 {
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
321
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
324 }
325
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
328
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
333 }
334
335 /* Emit function epilogue. */
336 void
337 pru_expand_epilogue (bool sibcall_p)
338 {
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
342
343 if (!sibcall_p && pru_can_use_return_insn ())
344 {
345 emit_jump_insn (gen_return ());
346 return;
347 }
348
349 emit_insn (gen_blockage ());
350
351 total_frame_size = cfun->machine->total_size;
352
353 if (frame_pointer_needed)
354 {
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
359
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
362 }
363 else if (!UBYTE_INT (total_frame_size))
364 {
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
368 }
369 else
370 {
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
373 }
374
375 regno_start = 0;
376 do
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
379
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
382
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
386
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
389
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
392 }
393
394 /* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
396 rtx
397 pru_get_return_address (int count)
398 {
399 if (count != 0)
400 return NULL_RTX;
401
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
404 }
405
406 /* Implement FUNCTION_PROFILER macro. */
407 void
408 pru_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
409 {
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
413 }
414
415 /* Dump stack layout. */
416 static void
417 pru_dump_frame_layout (FILE *file)
418 {
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
438 }
439
440 /* Return true if REGNO should be saved in the prologue. */
441 static bool
442 prologue_saved_reg_p (int regno)
443 {
444 gcc_assert (GP_REG_P (regno));
445
446 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
447 return true;
448
449 /* 32-bit FP. */
450 if (frame_pointer_needed
451 && regno >= HARD_FRAME_POINTER_REGNUM
452 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
453 return true;
454
455 /* 16-bit RA. */
456 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
457 return true;
458 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
459 return true;
460
461 return false;
462 }
463
464 /* Implement TARGET_CAN_ELIMINATE. */
465 static bool
466 pru_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
467 {
468 if (to == STACK_POINTER_REGNUM)
469 return !frame_pointer_needed;
470 return true;
471 }
472
473 /* Implement INITIAL_ELIMINATION_OFFSET macro. */
474 int
475 pru_initial_elimination_offset (int from, int to)
476 {
477 int offset;
478
479 /* Set OFFSET to the offset from the stack pointer. */
480 switch (from)
481 {
482 case FRAME_POINTER_REGNUM:
483 offset = cfun->machine->out_args_size;
484 break;
485
486 case ARG_POINTER_REGNUM:
487 offset = cfun->machine->total_size;
488 offset -= crtl->args.pretend_args_size;
489 break;
490
491 default:
492 gcc_unreachable ();
493 }
494
495 /* If we are asked for the frame pointer offset, then adjust OFFSET
496 by the offset from the frame pointer to the stack pointer. */
497 if (to == HARD_FRAME_POINTER_REGNUM)
498 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
499
500
501 return offset;
502 }
503
504 /* Return nonzero if this function is known to have a null epilogue.
505 This allows the optimizer to omit jumps to jumps if no stack
506 was created. */
507 int
508 pru_can_use_return_insn (void)
509 {
510 if (!reload_completed || crtl->profile)
511 return 0;
512
513 return cfun->machine->total_size == 0;
514 }
515 \f
516 /* Implement TARGET_HARD_REGNO_MODE_OK. */
517
518 static bool
519 pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
520 {
521 switch (GET_MODE_SIZE (mode))
522 {
523 case 1: return true;
524 case 2: return (regno % 4) <= 2;
525 case 4: return (regno % 4) == 0;
526 case 8: return (regno % 4) == 0;
527 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
528 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
529 default:
530 /* TODO: Find out why VOIDmode and BLKmode are passed. */
531 gcc_assert (mode == BLKmode || mode == VOIDmode);
532 return (regno % 4) == 0;
533 }
534 }
535
536 /* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
537 Returns true if REGNO is safe to be allocated as a scratch
538 register (for a define_peephole2) in the current function. */
539
540 static bool
541 pru_hard_regno_scratch_ok (unsigned int regno)
542 {
543 /* Don't allow hard registers that might be part of the frame pointer.
544 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
545 and don't handle a frame pointer that spans more than one register.
546 TODO: Fix those faulty places. */
547
548 if ((!reload_completed || frame_pointer_needed)
549 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
550 HARD_FRAME_POINTER_REGNUM + 3)
551 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
552 FRAME_POINTER_REGNUM + 3)))
553 return false;
554
555 return true;
556 }
557
558
559 /* Worker function for `HARD_REGNO_RENAME_OK'.
560 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
561
562 int
563 pru_hard_regno_rename_ok (unsigned int old_reg,
564 unsigned int new_reg)
565 {
566 /* Don't allow hard registers that might be part of the frame pointer.
567 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
568 and don't care for a frame pointer that spans more than one register.
569 TODO: Fix those faulty places. */
570 if ((!reload_completed || frame_pointer_needed)
571 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
572 HARD_FRAME_POINTER_REGNUM + 3)
573 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
574 FRAME_POINTER_REGNUM + 3)
575 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
576 HARD_FRAME_POINTER_REGNUM + 3)
577 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
578 FRAME_POINTER_REGNUM + 3)))
579 return 0;
580
581 return 1;
582 }
583 \f
584 /* Allocate a chunk of memory for per-function machine-dependent data. */
585 static struct machine_function *
586 pru_init_machine_status (void)
587 {
588 return ggc_cleared_alloc<machine_function> ();
589 }
590
591 /* Implement TARGET_OPTION_OVERRIDE. */
592 static void
593 pru_option_override (void)
594 {
595 #ifdef SUBTARGET_OVERRIDE_OPTIONS
596 SUBTARGET_OVERRIDE_OPTIONS;
597 #endif
598
599 /* Check for unsupported options. */
600 if (flag_pic == 1)
601 warning (OPT_fpic, "%<-fpic%> is not supported");
602 if (flag_pic == 2)
603 warning (OPT_fPIC, "%<-fPIC%> is not supported");
604 if (flag_pie == 1)
605 warning (OPT_fpie, "%<-fpie%> is not supported");
606 if (flag_pie == 2)
607 warning (OPT_fPIE, "%<-fPIE%> is not supported");
608
609 /* QBxx conditional branching cannot cope with block reordering. */
610 if (flag_reorder_blocks_and_partition)
611 {
612 inform (input_location, "%<-freorder-blocks-and-partition%> "
613 "not supported on this architecture");
614 flag_reorder_blocks_and_partition = 0;
615 flag_reorder_blocks = 1;
616 }
617
618 /* Function to allocate machine-dependent function status. */
619 init_machine_status = &pru_init_machine_status;
620
621 /* Save the initial options in case the user does function specific
622 options. */
623 target_option_default_node = target_option_current_node
624 = build_target_option_node (&global_options, &global_options_set);
625
626 /* Due to difficulties in implementing the TI ABI with GCC,
627 at least check and error-out if GCC cannot compile a
628 compliant output. */
629 pru_register_abicheck_pass ();
630 }
631 \f
632 /* Compute a (partial) cost for rtx X. Return true if the complete
633 cost has been computed, and false if subexpressions should be
634 scanned. In either case, *TOTAL contains the cost result. */
635 static bool
636 pru_rtx_costs (rtx x, machine_mode mode,
637 int outer_code, int opno ATTRIBUTE_UNUSED,
638 int *total, bool speed ATTRIBUTE_UNUSED)
639 {
640 const int code = GET_CODE (x);
641
642 switch (code)
643 {
644 case CONST_INT:
645 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
646 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
647 {
648 *total = COSTS_N_INSNS (0);
649 return true;
650 }
651 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
652 || (mode != VOIDmode && const_uhword_operand (x, mode)))
653 {
654 *total = COSTS_N_INSNS (1);
655 return true;
656 }
657 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
658 {
659 *total = COSTS_N_INSNS (0);
660 return true;
661 }
662 else
663 {
664 *total = COSTS_N_INSNS (2);
665 return true;
666 }
667
668 case LABEL_REF:
669 case SYMBOL_REF:
670 case CONST:
671 {
672 *total = COSTS_N_INSNS (1);
673 return true;
674 }
675 case CONST_DOUBLE:
676 {
677 *total = COSTS_N_INSNS (2);
678 return true;
679 }
680 case CONST_WIDE_INT:
681 {
682 /* PRU declares no vector or very large integer types. */
683 gcc_unreachable ();
684 return true;
685 }
686 case SET:
687 {
688 int factor;
689
690 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
691 the mode for the factor. */
692 mode = GET_MODE (SET_DEST (x));
693
694 /* SI move has the same cost as a QI move. Moves larger than
695 64 bits are costly. */
696 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
697 *total = factor * COSTS_N_INSNS (1);
698
699 return false;
700 }
701
702 case MULT:
703 {
704 /* Factor in that "mul" requires fixed registers, which
705 would likely require register moves. */
706 *total = COSTS_N_INSNS (7);
707 return false;
708 }
709 case PLUS:
710 {
711 rtx op0 = XEXP (x, 0);
712 rtx op1 = XEXP (x, 1);
713 machine_mode op1_mode = GET_MODE (op1);
714
715 /* Generic RTL address expressions do not enforce mode for
716 offsets, yet our UBYTE constraint requires it. Fix it here. */
717 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
718 op1_mode = Pmode;
719 if (outer_code == MEM
720 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
721 || ctable_addr_operand (op0, VOIDmode)
722 || ctable_addr_operand (op1, VOIDmode)
723 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
724 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
725 {
726 /* CTABLE or REG base addressing - PLUS comes for free. */
727 *total = COSTS_N_INSNS (0);
728 return true;
729 }
730 else
731 {
732 *total = COSTS_N_INSNS (1);
733 return false;
734 }
735 }
736 case SIGN_EXTEND:
737 {
738 *total = COSTS_N_INSNS (3);
739 return false;
740 }
741 case ASHIFTRT:
742 {
743 rtx op1 = XEXP (x, 1);
744 if (const_1_operand (op1, VOIDmode))
745 *total = COSTS_N_INSNS (3);
746 else
747 *total = COSTS_N_INSNS (7);
748 return false;
749 }
750 case ZERO_EXTRACT:
751 {
752 rtx op2 = XEXP (x, 2);
753 if ((outer_code == EQ || outer_code == NE)
754 && CONST_INT_P (op2)
755 && INTVAL (op2) == 1)
756 {
757 /* Branch if bit is set/clear is a single instruction. */
758 *total = COSTS_N_INSNS (0);
759 return true;
760 }
761 else
762 {
763 *total = COSTS_N_INSNS (2);
764 return false;
765 }
766 }
767 case ZERO_EXTEND:
768 {
769 *total = COSTS_N_INSNS (0);
770 return false;
771 }
772
773 default:
774 {
775 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
776 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
777 *total = factor * COSTS_N_INSNS (1);
778 return false;
779 }
780 }
781 }
782 \f
783 static GTY(()) rtx eqdf_libfunc;
784 static GTY(()) rtx nedf_libfunc;
785 static GTY(()) rtx ledf_libfunc;
786 static GTY(()) rtx ltdf_libfunc;
787 static GTY(()) rtx gedf_libfunc;
788 static GTY(()) rtx gtdf_libfunc;
789 static GTY(()) rtx eqsf_libfunc;
790 static GTY(()) rtx nesf_libfunc;
791 static GTY(()) rtx lesf_libfunc;
792 static GTY(()) rtx ltsf_libfunc;
793 static GTY(()) rtx gesf_libfunc;
794 static GTY(()) rtx gtsf_libfunc;
795
796 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
797 functions to match the PRU ABI. */
798
799 static void
800 pru_init_libfuncs (void)
801 {
802 /* Double-precision floating-point arithmetic. */
803 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
804 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
805 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
806 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
807 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
808
809 /* Single-precision floating-point arithmetic. */
810 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
811 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
812 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
813 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
814 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
815
816 /* Floating-point comparisons. */
817 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
818 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
819 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
820 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
821 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
822 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
823 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
824 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
825 ledf_libfunc = init_one_libfunc ("__pruabi_led");
826 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
827 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
828 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
829
830 /* In PRU ABI, much like other TI processors, floating point
831 comparisons return non-standard values. This quirk is handled
832 by disabling the optab library functions, and handling the
833 comparison during RTL expansion. */
834 set_optab_libfunc (eq_optab, SFmode, NULL);
835 set_optab_libfunc (ne_optab, SFmode, NULL);
836 set_optab_libfunc (gt_optab, SFmode, NULL);
837 set_optab_libfunc (ge_optab, SFmode, NULL);
838 set_optab_libfunc (lt_optab, SFmode, NULL);
839 set_optab_libfunc (le_optab, SFmode, NULL);
840 set_optab_libfunc (eq_optab, DFmode, NULL);
841 set_optab_libfunc (ne_optab, DFmode, NULL);
842 set_optab_libfunc (gt_optab, DFmode, NULL);
843 set_optab_libfunc (ge_optab, DFmode, NULL);
844 set_optab_libfunc (lt_optab, DFmode, NULL);
845 set_optab_libfunc (le_optab, DFmode, NULL);
846
847 /* The isunordered function appears to be supported only by GCC. */
848 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
849 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
850
851 /* Floating-point to integer conversions. */
852 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
853 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
854 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
855 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
856 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
857 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
858 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
859 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
860
861 /* Conversions between floating types. */
862 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
863 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
864
865 /* Integer to floating-point conversions. */
866 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
867 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
868 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
869 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
870 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
871 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
872 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
873 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
874
875 /* Long long. */
876 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
877 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
878 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
879 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
880
881 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
882 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
883 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
884 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
885 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
886 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
887 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
888 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
889 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
890 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
891 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
892 }
893
894
895 /* Emit comparison instruction if necessary, returning the expression
896 that holds the compare result in the proper mode. Return the comparison
897 that should be used in the jump insn. */
898
899 rtx
900 pru_expand_fp_compare (rtx comparison, machine_mode mode)
901 {
902 enum rtx_code code = GET_CODE (comparison);
903 rtx op0 = XEXP (comparison, 0);
904 rtx op1 = XEXP (comparison, 1);
905 rtx cmp;
906 enum rtx_code jump_code = code;
907 machine_mode op_mode = GET_MODE (op0);
908 rtx_insn *insns;
909 rtx libfunc;
910
911 gcc_assert (op_mode == DFmode || op_mode == SFmode);
912
913 /* FP exceptions are not raised by PRU's softfp implementation. So the
914 following transformations are safe. */
915 if (code == UNGE)
916 {
917 code = LT;
918 jump_code = EQ;
919 }
920 else if (code == UNLE)
921 {
922 code = GT;
923 jump_code = EQ;
924 }
925 else
926 jump_code = NE;
927
928 switch (code)
929 {
930 case EQ:
931 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
932 break;
933 case NE:
934 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
935 break;
936 case GT:
937 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
938 break;
939 case GE:
940 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
941 break;
942 case LT:
943 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
944 break;
945 case LE:
946 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
947 break;
948 default:
949 gcc_unreachable ();
950 }
951 start_sequence ();
952
953 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
954 op0, op_mode, op1, op_mode);
955 insns = get_insns ();
956 end_sequence ();
957
958 emit_libcall_block (insns, cmp, cmp,
959 gen_rtx_fmt_ee (code, SImode, op0, op1));
960
961 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
962 }
963 \f
964 /* Return the sign bit position for given OP's mode. */
965 static int
966 sign_bit_position (const rtx op)
967 {
968 const int sz = GET_MODE_SIZE (GET_MODE (op));
969
970 return sz * 8 - 1;
971 }
972
973 /* Output asm code for sign_extend operation. */
974 const char *
975 pru_output_sign_extend (rtx *operands)
976 {
977 static char buf[512];
978 int bufi;
979 const int dst_sz = GET_MODE_SIZE (GET_MODE (operands[0]));
980 const int src_sz = GET_MODE_SIZE (GET_MODE (operands[1]));
981 char ext_start;
982
983 switch (src_sz)
984 {
985 case 1: ext_start = 'y'; break;
986 case 2: ext_start = 'z'; break;
987 default: gcc_unreachable ();
988 }
989
990 gcc_assert (dst_sz > src_sz);
991
992 /* Note that src and dst can be different parts of the same
993 register, e.g. "r7, r7.w1". */
994 bufi = snprintf (buf, sizeof (buf),
995 "mov\t%%0, %%1\n\t" /* Copy AND make positive. */
996 "qbbc\t.+8, %%0, %d\n\t" /* Check sign bit. */
997 "fill\t%%%c0, %d", /* Make negative. */
998 sign_bit_position (operands[1]),
999 ext_start,
1000 dst_sz - src_sz);
1001
1002 gcc_assert (bufi > 0);
1003 gcc_assert ((unsigned int) bufi < sizeof (buf));
1004
1005 return buf;
1006 }
1007 \f
1008 /* Branches and compares. */
1009
1010 /* PRU's ALU does not support signed comparison operations. That's why we
1011 emulate them. By first checking the sign bit and handling every possible
1012 operand sign combination, we can simulate signed comparisons in just
1013 5 instructions. See table below.
1014
1015 .-------------------.---------------------------------------------------.
1016 | Operand sign bit | Mapping the signed comparison to an unsigned one |
1017 |---------+---------+------------+------------+------------+------------|
1018 | OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1019 |---------+---------+------------+------------+------------+------------|
1020 | 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1021 |---------+---------+------------+------------+------------+------------|
1022 | 0 | 1 | false | false | true | true |
1023 |---------+---------+------------+------------+------------+------------|
1024 | 1 | 0 | true | true | false | false |
1025 |---------+---------+------------+------------+------------+------------|
1026 | 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1027 `---------'---------'------------'------------'------------+------------'
1028
1029
1030 Given the table above, here is an example for a concrete op:
1031 LT:
1032 qbbc OP1_POS, OP1, 31
1033 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1034 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1035 ; jmp OUT -> can be eliminated because we'll take the
1036 ; following branch. OP2.b31 is guaranteed to be 1
1037 ; by the time we get here.
1038 OP1_POS: qbbs OUT, OP2, 31
1039 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1040 #if FAR_JUMP
1041 jmp OUT
1042 BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1043 #endif
1044 OUT:
1045
1046 */
1047
1048 /* Output asm code for a signed-compare LT/LE conditional branch. */
1049 static const char *
1050 pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1051 {
1052 static char buf[1024];
1053 enum rtx_code code = GET_CODE (operands[0]);
1054 rtx op1;
1055 rtx op2;
1056 const char *cmp_opstr;
1057 int bufi = 0;
1058
1059 op1 = operands[1];
1060 op2 = operands[2];
1061
1062 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1063
1064 /* Determine the comparison operators for positive and negative operands. */
1065 if (code == LT)
1066 cmp_opstr = "qblt";
1067 else if (code == LE)
1068 cmp_opstr = "qble";
1069 else
1070 gcc_unreachable ();
1071
1072 if (is_near)
1073 bufi = snprintf (buf, sizeof (buf),
1074 "qbbc\t.+12, %%1, %d\n\t"
1075 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1076 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1077 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1078 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1079 sign_bit_position (op1),
1080 sign_bit_position (op2),
1081 cmp_opstr,
1082 sign_bit_position (op2),
1083 cmp_opstr);
1084 else
1085 bufi = snprintf (buf, sizeof (buf),
1086 "qbbc\t.+12, %%1, %d\n\t"
1087 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1088 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1089 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1090 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1091 "jmp\t.+8\n\t" /* jmp OUT. */
1092 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1093 sign_bit_position (op1),
1094 sign_bit_position (op2),
1095 cmp_opstr,
1096 sign_bit_position (op2),
1097 cmp_opstr);
1098
1099 gcc_assert (bufi > 0);
1100 gcc_assert ((unsigned int) bufi < sizeof (buf));
1101
1102 return buf;
1103 }
1104
1105 /* Output asm code for a signed-compare GT/GE conditional branch. */
1106 static const char *
1107 pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1108 {
1109 static char buf[1024];
1110 enum rtx_code code = GET_CODE (operands[0]);
1111 rtx op1;
1112 rtx op2;
1113 const char *cmp_opstr;
1114 int bufi = 0;
1115
1116 op1 = operands[1];
1117 op2 = operands[2];
1118
1119 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1120
1121 /* Determine the comparison operators for positive and negative operands. */
1122 if (code == GT)
1123 cmp_opstr = "qbgt";
1124 else if (code == GE)
1125 cmp_opstr = "qbge";
1126 else
1127 gcc_unreachable ();
1128
1129 if (is_near)
1130 bufi = snprintf (buf, sizeof (buf),
1131 "qbbs\t.+12, %%1, %d\n\t"
1132 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1133 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1134 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1135 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1136 sign_bit_position (op1),
1137 sign_bit_position (op2),
1138 cmp_opstr,
1139 sign_bit_position (op2),
1140 cmp_opstr);
1141 else
1142 bufi = snprintf (buf, sizeof (buf),
1143 "qbbs\t.+12, %%1, %d\n\t"
1144 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1145 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1146 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1147 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1148 "jmp\t.+8\n\t" /* jmp OUT. */
1149 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1150 sign_bit_position (op1),
1151 sign_bit_position (op2),
1152 cmp_opstr,
1153 sign_bit_position (op2),
1154 cmp_opstr);
1155
1156 gcc_assert (bufi > 0);
1157 gcc_assert ((unsigned int) bufi < sizeof (buf));
1158
1159 return buf;
1160 }
1161
1162 /* Output asm code for a signed-compare conditional branch.
1163
1164 If IS_NEAR is true, then QBBx instructions may be used for reaching
1165 the destination label. Otherwise JMP is used, at the expense of
1166 increased code size. */
1167 const char *
1168 pru_output_signed_cbranch (rtx *operands, bool is_near)
1169 {
1170 enum rtx_code code = GET_CODE (operands[0]);
1171
1172 if (code == LT || code == LE)
1173 return pru_output_ltle_signed_cbranch (operands, is_near);
1174 else if (code == GT || code == GE)
1175 return pru_output_gtge_signed_cbranch (operands, is_near);
1176 else
1177 gcc_unreachable ();
1178 }
1179
1180 /* Optimized version of pru_output_signed_cbranch for constant second
1181 operand. */
1182
1183 const char *
1184 pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1185 {
1186 static char buf[1024];
1187 enum rtx_code code = GET_CODE (operands[0]);
1188 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1189 const char *cmp_opstr;
1190 const char *rcmp_opstr;
1191
1192 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1193 code = swap_condition (code);
1194
1195 /* Determine normal and reversed comparison operators for both positive
1196 operands. This enables us to go completely unsigned.
1197
1198 NOTE: We cannot use the R print modifier because we convert signed
1199 comparison operators to unsigned ones. */
1200 switch (code)
1201 {
1202 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1203 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1204 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1205 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1206 default: gcc_unreachable ();
1207 }
1208
1209 /* OP2 is a constant unsigned byte - utilize this info to generate
1210 optimized code. We can "remove half" of the op table above because
1211 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1212 if (code == LT || code == LE)
1213 {
1214 if (is_near)
1215 snprintf (buf, sizeof (buf),
1216 "qbbs\t.+8, %%1, %d\n\t"
1217 "%s\t%%l3, %%1, %%u2",
1218 regop_sign_bit_pos,
1219 cmp_opstr);
1220 else
1221 snprintf (buf, sizeof (buf),
1222 "qbbs\t.+12, %%1, %d\n\t"
1223 "%s\t.+8, %%1, %%u2\n\t"
1224 "jmp\t%%%%label(%%l3)",
1225 regop_sign_bit_pos,
1226 rcmp_opstr);
1227 }
1228 else if (code == GT || code == GE)
1229 {
1230 if (is_near)
1231 snprintf (buf, sizeof (buf),
1232 "qbbs\t%%l3, %%1, %d\n\t"
1233 "%s\t%%l3, %%1, %%u2",
1234 regop_sign_bit_pos,
1235 cmp_opstr);
1236 else
1237 snprintf (buf, sizeof (buf),
1238 "qbbs\t.+8, %%1, %d\n\t"
1239 "%s\t.+8, %%1, %%u2\n\t"
1240 "jmp\t%%%%label(%%l3)",
1241 regop_sign_bit_pos,
1242 rcmp_opstr);
1243 }
1244 else
1245 gcc_unreachable ();
1246
1247 return buf;
1248 }
1249
1250 /* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1251 zero second operand. */
1252
1253 const char *
1254 pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1255 {
1256 static char buf[1024];
1257 enum rtx_code code = GET_CODE (operands[0]);
1258 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1259
1260 /* OP2 is a constant zero - utilize this info to simply check the
1261 OP1 sign bit when comparing for LT or GE. */
1262 if (code == LT)
1263 {
1264 if (is_near)
1265 snprintf (buf, sizeof (buf),
1266 "qbbs\t%%l3, %%1, %d\n\t",
1267 regop_sign_bit_pos);
1268 else
1269 snprintf (buf, sizeof (buf),
1270 "qbbc\t.+8, %%1, %d\n\t"
1271 "jmp\t%%%%label(%%l3)",
1272 regop_sign_bit_pos);
1273 }
1274 else if (code == GE)
1275 {
1276 if (is_near)
1277 snprintf (buf, sizeof (buf),
1278 "qbbc\t%%l3, %%1, %d\n\t",
1279 regop_sign_bit_pos);
1280 else
1281 snprintf (buf, sizeof (buf),
1282 "qbbs\t.+8, %%1, %d\n\t"
1283 "jmp\t%%%%label(%%l3)",
1284 regop_sign_bit_pos);
1285 }
1286 else
1287 gcc_unreachable ();
1288
1289 return buf;
1290 }
1291
1292 /* Addressing Modes. */
1293
1294 /* Return true if register REGNO is a valid base register.
1295 STRICT_P is true if REG_OK_STRICT is in effect. */
1296
1297 bool
1298 pru_regno_ok_for_base_p (int regno, bool strict_p)
1299 {
1300 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1301 return true;
1302
1303 /* The fake registers will be eliminated to either the stack or
1304 hard frame pointer, both of which are usually valid base registers.
1305 Reload deals with the cases where the eliminated form isn't valid. */
1306 return (GP_REG_P (regno)
1307 || regno == FRAME_POINTER_REGNUM
1308 || regno == ARG_POINTER_REGNUM);
1309 }
1310
1311 /* Return true if given xbbo constant OFFSET is valid. */
1312 static bool
1313 pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1314 {
1315 bool valid = UBYTE_INT (offset);
1316
1317 /* Reload can split multi word accesses, so make sure we can address
1318 the second word in a DI. */
1319 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1320 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1321
1322 return valid;
1323 }
1324
1325 /* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1326 base was not found in the pragma-filled pru_ctable. */
1327 int
1328 pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1329 {
1330 unsigned int i;
1331
1332 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1333 {
1334 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1335 return i;
1336 }
1337 return -1;
1338 }
1339
1340
1341 /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1342 and return the base CTABLE index if possible. */
1343 int
1344 pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1345 {
1346 unsigned int i;
1347
1348 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1349 {
1350 if (pru_ctable[i].valid && IN_RANGE (caddr,
1351 pru_ctable[i].base,
1352 pru_ctable[i].base + 0xff))
1353 return i;
1354 }
1355 return -1;
1356 }
1357
1358
1359 /* Return the offset from some CTABLE base for this address. */
1360 int
1361 pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1362 {
1363 int i;
1364
1365 i = pru_get_ctable_base_index (caddr);
1366 gcc_assert (i >= 0);
1367
1368 return caddr - pru_ctable[i].base;
1369 }
1370
1371 /* Return true if the address expression formed by BASE + OFFSET is
1372 valid.
1373
1374 Note that the following address is not handled here:
1375 base CTABLE constant base + UBYTE constant offset
1376 The constants will be folded. The ctable_addr_operand predicate will take
1377 care of the validation. The CTABLE base+offset split will happen during
1378 operand printing. */
1379 static bool
1380 pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1381 {
1382 if (!strict_p && GET_CODE (base) == SUBREG)
1383 base = SUBREG_REG (base);
1384 if (!strict_p && GET_CODE (offset) == SUBREG)
1385 offset = SUBREG_REG (offset);
1386
1387 if (REG_P (base)
1388 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1389 && ((CONST_INT_P (offset)
1390 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1391 || (REG_P (offset)
1392 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1393 /* base register + register offset
1394 * OR base register + UBYTE constant offset. */
1395 return true;
1396 else if (REG_P (base)
1397 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1398 && ctable_base_operand (offset, VOIDmode))
1399 /* base CTABLE constant base + register offset
1400 * Note: GCC always puts the register as a first operand of PLUS. */
1401 return true;
1402 else
1403 return false;
1404 }
1405
1406 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1407 static bool
1408 pru_legitimate_address_p (machine_mode mode,
1409 rtx operand, bool strict_p)
1410 {
1411 switch (GET_CODE (operand))
1412 {
1413 /* Direct. */
1414 case SYMBOL_REF:
1415 case LABEL_REF:
1416 case CONST:
1417 case CONST_WIDE_INT:
1418 return false;
1419
1420 case CONST_INT:
1421 return ctable_addr_operand (operand, VOIDmode);
1422
1423 /* Register indirect. */
1424 case REG:
1425 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1426
1427 /* Register indirect with displacement. */
1428 case PLUS:
1429 {
1430 rtx op0 = XEXP (operand, 0);
1431 rtx op1 = XEXP (operand, 1);
1432
1433 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1434 }
1435
1436 default:
1437 break;
1438 }
1439 return false;
1440 }
1441 \f
1442 /* Output assembly language related definitions. */
1443
1444 /* Implement TARGET_ASM_CONSTRUCTOR. */
1445 static void
1446 pru_elf_asm_constructor (rtx symbol, int priority)
1447 {
1448 char buf[23];
1449 section *s;
1450
1451 if (priority == DEFAULT_INIT_PRIORITY)
1452 snprintf (buf, sizeof (buf), ".init_array");
1453 else
1454 {
1455 /* While priority is known to be in range [0, 65535], so 18 bytes
1456 would be enough, the compiler might not know that. To avoid
1457 -Wformat-truncation false positive, use a larger size. */
1458 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1459 }
1460 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1461 switch_to_section (s);
1462 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1463 }
1464
1465 /* Implement TARGET_ASM_DESTRUCTOR. */
1466 static void
1467 pru_elf_asm_destructor (rtx symbol, int priority)
1468 {
1469 char buf[23];
1470 section *s;
1471
1472 if (priority == DEFAULT_INIT_PRIORITY)
1473 snprintf (buf, sizeof (buf), ".fini_array");
1474 else
1475 {
1476 /* While priority is known to be in range [0, 65535], so 18 bytes
1477 would be enough, the compiler might not know that. To avoid
1478 -Wformat-truncation false positive, use a larger size. */
1479 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1480 }
1481 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1482 switch_to_section (s);
1483 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1484 }
1485
1486 /* Map rtx_code to unsigned PRU branch op suffix. Callers must
1487 handle sign comparison themselves for signed operations. */
1488 static const char *
1489 pru_comparison_str (enum rtx_code cond)
1490 {
1491 switch (cond)
1492 {
1493 case NE: return "ne";
1494 case EQ: return "eq";
1495 case GEU: return "ge";
1496 case GTU: return "gt";
1497 case LEU: return "le";
1498 case LTU: return "lt";
1499 default: gcc_unreachable ();
1500 }
1501 }
1502
1503 /* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1504 the bit representation of X by "casting" it to CONST_INT. */
1505
1506 static rtx
1507 pru_to_int_mode (rtx x)
1508 {
1509 machine_mode mode = GET_MODE (x);
1510
1511 return VOIDmode == mode
1512 ? x
1513 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1514 }
1515
1516 /* Translate between the MachineDescription notion
1517 of 8-bit consecutive registers, to the PRU
1518 assembler syntax of REGWORD[.SUBREG]. */
1519 static const char *
1520 pru_asm_regname (rtx op)
1521 {
1522 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1523 int speci, regi;
1524
1525 gcc_assert (REG_P (op));
1526
1527 if (!canon_reg_names[0][0][0])
1528 {
1529 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1530 for (speci = 0; speci < 3; speci++)
1531 {
1532 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1533 if ((regi + sz) > (32 * 4))
1534 continue; /* Invalid entry. */
1535
1536 /* Construct the lookup table. */
1537 const char *suffix = "";
1538
1539 switch ((sz << 8) | (regi % 4))
1540 {
1541 case (1 << 8) | 0: suffix = ".b0"; break;
1542 case (1 << 8) | 1: suffix = ".b1"; break;
1543 case (1 << 8) | 2: suffix = ".b2"; break;
1544 case (1 << 8) | 3: suffix = ".b3"; break;
1545 case (2 << 8) | 0: suffix = ".w0"; break;
1546 case (2 << 8) | 1: suffix = ".w1"; break;
1547 case (2 << 8) | 2: suffix = ".w2"; break;
1548 case (4 << 8) | 0: suffix = ""; break;
1549 default:
1550 /* Invalid entry. */
1551 continue;
1552 }
1553 sprintf (&canon_reg_names[speci][regi][0],
1554 "r%d%s", regi / 4, suffix);
1555 }
1556 }
1557
1558 switch (GET_MODE_SIZE (GET_MODE (op)))
1559 {
1560 case 1: speci = 0; break;
1561 case 2: speci = 1; break;
1562 case 4: speci = 2; break;
1563 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1564 default: gcc_unreachable ();
1565 }
1566 regi = REGNO (op);
1567 gcc_assert (regi < LAST_GP_REGNUM);
1568 gcc_assert (canon_reg_names[speci][regi][0]);
1569
1570 return &canon_reg_names[speci][regi][0];
1571 }
1572
1573 /* Print the operand OP to file stream FILE modified by LETTER.
1574 LETTER can be one of:
1575
1576 b: prints the register byte start (used by LBBO/SBBO).
1577 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1578 F: Full 32-bit register.
1579 H: Higher 16-bits of a const_int operand.
1580 L: Lower 16-bits of a const_int operand.
1581 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1582 P: prints swapped condition.
1583 Q: prints swapped and reversed condition.
1584 R: prints reversed condition.
1585 S: print operand mode size (but do not print the operand itself).
1586 T: print exact_log2 () for const_int operands.
1587 u: print QI constant integer as unsigned. No transformation for regs.
1588 V: print exact_log2 () of negated const_int operands.
1589 w: Lower 32-bits of a const_int operand.
1590 W: Upper 32-bits of a const_int operand.
1591 y: print the next 8-bit register (regardless of op size).
1592 z: print the second next 8-bit register (regardless of op size).
1593 */
1594 static void
1595 pru_print_operand (FILE *file, rtx op, int letter)
1596 {
1597 switch (letter)
1598 {
1599 case 'S':
1600 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1601 return;
1602
1603 default:
1604 break;
1605 }
1606
1607 if (comparison_operator (op, VOIDmode))
1608 {
1609 enum rtx_code cond = GET_CODE (op);
1610 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1611
1612 switch (letter)
1613 {
1614 case 0:
1615 fprintf (file, "%s", pru_comparison_str (cond));
1616 return;
1617 case 'P':
1618 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1619 return;
1620 case 'Q':
1621 cond = swap_condition (cond);
1622 /* Fall through. */
1623 case 'R':
1624 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1625 return;
1626 }
1627 }
1628
1629 switch (GET_CODE (op))
1630 {
1631 case REG:
1632 if (letter == 0 || letter == 'u')
1633 {
1634 fprintf (file, "%s", pru_asm_regname (op));
1635 return;
1636 }
1637 else if (letter == 'b')
1638 {
1639 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1640 {
1641 output_operand_lossage ("I/O register operand for '%%%c'",
1642 letter);
1643 return;
1644 }
1645 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1646 return;
1647 }
1648 else if (letter == 'F' || letter == 'N')
1649 {
1650 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1651 {
1652 output_operand_lossage ("I/O register operand for '%%%c'",
1653 letter);
1654 return;
1655 }
1656 if (REGNO (op) % 4 != 0)
1657 {
1658 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1659 letter);
1660 return;
1661 }
1662 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1663 return;
1664 }
1665 else if (letter == 'y')
1666 {
1667 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1668 {
1669 output_operand_lossage ("invalid operand for '%%%c'", letter);
1670 return;
1671 }
1672 fprintf (file, "%s", reg_names[REGNO (op) + 1]);
1673 return;
1674 }
1675 else if (letter == 'z')
1676 {
1677 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 2)
1678 {
1679 output_operand_lossage ("invalid operand for '%%%c'", letter);
1680 return;
1681 }
1682 fprintf (file, "%s", reg_names[REGNO (op) + 2]);
1683 return;
1684 }
1685 break;
1686
1687 case CONST_INT:
1688 if (letter == 'H')
1689 {
1690 HOST_WIDE_INT val = INTVAL (op);
1691 val = (val >> 16) & 0xFFFF;
1692 output_addr_const (file, gen_int_mode (val, SImode));
1693 return;
1694 }
1695 else if (letter == 'L')
1696 {
1697 HOST_WIDE_INT val = INTVAL (op);
1698 val &= 0xFFFF;
1699 output_addr_const (file, gen_int_mode (val, SImode));
1700 return;
1701 }
1702 else if (letter == 'T')
1703 {
1704 /* The predicate should have already validated the 1-high-bit
1705 requirement. Use CTZ here to deal with constant's sign
1706 extension. */
1707 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1708 if (val < 0 || val > 31)
1709 {
1710 output_operand_lossage ("invalid operand for '%%%c'", letter);
1711 return;
1712 }
1713 output_addr_const (file, gen_int_mode (val, SImode));
1714 return;
1715 }
1716 else if (letter == 'V')
1717 {
1718 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1719 if (val < 0 || val > 31)
1720 {
1721 output_operand_lossage ("invalid operand for '%%%c'", letter);
1722 return;
1723 }
1724 output_addr_const (file, gen_int_mode (val, SImode));
1725 return;
1726 }
1727 else if (letter == 'w')
1728 {
1729 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1730 output_addr_const (file, gen_int_mode (val, SImode));
1731 return;
1732 }
1733 else if (letter == 'W')
1734 {
1735 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1736 output_addr_const (file, gen_int_mode (val, SImode));
1737 return;
1738 }
1739 else if (letter == 'u')
1740 {
1741 /* Workaround GCC's representation of QI constants in sign-extended
1742 form, and PRU's assembler insistence on unsigned constant
1743 integers. See the notes about O constraint. */
1744 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1745 return;
1746 }
1747 /* Else, fall through. */
1748
1749 case CONST:
1750 case LABEL_REF:
1751 case SYMBOL_REF:
1752 if (letter == 0)
1753 {
1754 output_addr_const (file, op);
1755 return;
1756 }
1757 break;
1758
1759 case CONST_FIXED:
1760 {
1761 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1762 if (letter != 0)
1763 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1764 letter);
1765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1766 return;
1767 }
1768 break;
1769
1770 case CONST_DOUBLE:
1771 if (letter == 0)
1772 {
1773 long val;
1774
1775 if (GET_MODE (op) != SFmode)
1776 {
1777 output_operand_lossage ("double constants not supported");
1778 return;
1779 }
1780 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1781 fprintf (file, "0x%lx", val);
1782 return;
1783 }
1784 else if (letter == 'w' || letter == 'W')
1785 {
1786 long t[2];
1787 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1788 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1789 return;
1790 }
1791 else
1792 {
1793 output_operand_lossage ("invalid operand for '%%%c'", letter);
1794 return;
1795 }
1796 break;
1797
1798 case SUBREG:
1799 /* Subregs should not appear at so late stage. */
1800 gcc_unreachable ();
1801 break;
1802
1803 case MEM:
1804 if (letter == 0)
1805 {
1806 output_address (VOIDmode, op);
1807 return;
1808 }
1809 else if (letter == 'B')
1810 {
1811 rtx base = XEXP (op, 0);
1812 if (GET_CODE (base) == PLUS)
1813 {
1814 rtx op0 = XEXP (base, 0);
1815 rtx op1 = XEXP (base, 1);
1816
1817 /* PLUS cannot have two constant operands, so first one
1818 of them must be a REG, hence we must check for an
1819 exact base address. */
1820 if (ctable_base_operand (op1, VOIDmode))
1821 {
1822 fprintf (file, "c");
1823 return;
1824 }
1825 else if (REG_P (op0))
1826 {
1827 fprintf (file, "b");
1828 return;
1829 }
1830 else
1831 gcc_unreachable ();
1832 }
1833 else if (REG_P (base))
1834 {
1835 fprintf (file, "b");
1836 return;
1837 }
1838 else if (ctable_addr_operand (base, VOIDmode))
1839 {
1840 fprintf (file, "c");
1841 return;
1842 }
1843 else
1844 gcc_unreachable ();
1845 }
1846 break;
1847
1848 case CODE_LABEL:
1849 if (letter == 0)
1850 {
1851 output_addr_const (file, op);
1852 return;
1853 }
1854 break;
1855
1856 default:
1857 break;
1858 }
1859
1860 output_operand_lossage ("unsupported operand %s for code '%c'",
1861 GET_RTX_NAME (GET_CODE (op)), letter);
1862 }
1863
1864 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1865 static void
1866 pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1867 {
1868 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1869 {
1870 output_operand_lossage ("unexpected text address:");
1871 return;
1872 }
1873
1874 switch (GET_CODE (op))
1875 {
1876 case CONST:
1877 case LABEL_REF:
1878 case CONST_WIDE_INT:
1879 case SYMBOL_REF:
1880 break;
1881
1882 case CONST_INT:
1883 {
1884 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1885 int base = pru_get_ctable_base_index (caddr);
1886 int offs = pru_get_ctable_base_offset (caddr);
1887 if (base < 0)
1888 {
1889 output_operand_lossage ("unsupported constant address:");
1890 return;
1891 }
1892 fprintf (file, "%d, %d", base, offs);
1893 return;
1894 }
1895 break;
1896
1897 case PLUS:
1898 {
1899 int base;
1900 rtx op0 = XEXP (op, 0);
1901 rtx op1 = XEXP (op, 1);
1902
1903 if (REG_P (op0) && CONST_INT_P (op1)
1904 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
1905 {
1906 base = pru_get_ctable_exact_base_index (INTVAL (op1));
1907 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
1908 return;
1909 }
1910 else if (REG_P (op1) && CONST_INT_P (op0)
1911 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
1912 {
1913 /* Not a valid RTL. */
1914 gcc_unreachable ();
1915 }
1916 else if (REG_P (op0) && CONSTANT_P (op1))
1917 {
1918 fprintf (file, "%s, ", pru_asm_regname (op0));
1919 output_addr_const (file, op1);
1920 return;
1921 }
1922 else if (REG_P (op1) && CONSTANT_P (op0))
1923 {
1924 /* Not a valid RTL. */
1925 gcc_unreachable ();
1926 }
1927 else if (REG_P (op1) && REG_P (op0))
1928 {
1929 fprintf (file, "%s, %s", pru_asm_regname (op0),
1930 pru_asm_regname (op1));
1931 return;
1932 }
1933 }
1934 break;
1935
1936 case REG:
1937 fprintf (file, "%s, 0", pru_asm_regname (op));
1938 return;
1939
1940 case MEM:
1941 {
1942 rtx base = XEXP (op, 0);
1943 pru_print_operand_address (file, mode, base);
1944 return;
1945 }
1946 default:
1947 break;
1948 }
1949
1950 output_operand_lossage ("unsupported memory expression:");
1951 }
1952
1953 /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
1954 static void
1955 pru_asm_function_prologue (FILE *file)
1956 {
1957 if (flag_verbose_asm || flag_debug_asm)
1958 pru_dump_frame_layout (file);
1959 }
1960
1961 /* Implement `TARGET_ASM_INTEGER'.
1962 Target hook for assembling integer objects. PRU version needs
1963 special handling for references to pmem. Code copied from AVR. */
1964
1965 static bool
1966 pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
1967 {
1968 if (size == POINTER_SIZE / BITS_PER_UNIT
1969 && aligned_p
1970 && text_segment_operand (x, VOIDmode))
1971 {
1972 fputs ("\t.4byte\t%pmem(", asm_out_file);
1973 output_addr_const (asm_out_file, x);
1974 fputs (")\n", asm_out_file);
1975
1976 return true;
1977 }
1978 else if (size == INIT_ARRAY_ENTRY_BYTES
1979 && aligned_p
1980 && text_segment_operand (x, VOIDmode))
1981 {
1982 fputs ("\t.2byte\t%pmem(", asm_out_file);
1983 output_addr_const (asm_out_file, x);
1984 fputs (")\n", asm_out_file);
1985
1986 return true;
1987 }
1988 else
1989 {
1990 return default_assemble_integer (x, size, aligned_p);
1991 }
1992 }
1993
1994 /* Implement TARGET_ASM_FILE_START. */
1995
1996 static void
1997 pru_file_start (void)
1998 {
1999 default_file_start ();
2000
2001 /* Compiler will take care of placing %label, so there is no
2002 need to confuse users with this warning. */
2003 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2004 }
2005 \f
2006 /* Function argument related. */
2007
2008 /* Return the number of bytes needed for storing an argument with
2009 the given MODE and TYPE. */
2010 static int
2011 pru_function_arg_size (machine_mode mode, const_tree type)
2012 {
2013 HOST_WIDE_INT param_size;
2014
2015 if (mode == BLKmode)
2016 param_size = int_size_in_bytes (type);
2017 else
2018 param_size = GET_MODE_SIZE (mode);
2019
2020 /* Convert to words (round up). */
2021 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2022 gcc_assert (param_size >= 0);
2023
2024 return param_size;
2025 }
2026
2027 /* Check if argument with the given size must be
2028 passed/returned in a register.
2029
2030 Reference:
2031 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2032
2033 Arguments other than 8/16/24/32/64bits are passed on stack. */
2034 static bool
2035 pru_arg_in_reg_bysize (size_t sz)
2036 {
2037 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2038 }
2039
2040 /* Helper function to get the starting storage HW register for an argument,
2041 or -1 if it must be passed on stack. The cum_v state is not changed. */
2042 static int
2043 pru_function_arg_regi (cumulative_args_t cum_v,
2044 machine_mode mode, const_tree type,
2045 bool named)
2046 {
2047 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2048 size_t argsize = pru_function_arg_size (mode, type);
2049 size_t i, bi;
2050 int regi = -1;
2051
2052 if (!pru_arg_in_reg_bysize (argsize))
2053 return -1;
2054
2055 if (!named)
2056 return -1;
2057
2058 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2059 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2060 {
2061 /* VLAs and vector types are not defined in the PRU ABI. Let's
2062 handle them the same as their same-sized counterparts. This way
2063 we do not need to treat BLKmode differently, and need only to check
2064 the size. */
2065 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2066 || argsize == 4 || argsize == 8);
2067
2068 /* Ensure SI and DI arguments are stored in full registers only. */
2069 if ((argsize >= 4) && (i % 4) != 0)
2070 continue;
2071
2072 /* Structures with size 24 bits are passed starting at a full
2073 register boundary. */
2074 if (argsize == 3 && (i % 4) != 0)
2075 continue;
2076
2077 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2078 into a different full register. */
2079 if (argsize == 2 && (i % 4) == 3)
2080 continue;
2081
2082 for (bi = 0;
2083 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2084 bi++)
2085 {
2086 if (cum->regs_used[bi + i])
2087 break;
2088 }
2089 if (bi == argsize)
2090 regi = FIRST_ARG_REGNUM + i;
2091 }
2092
2093 return regi;
2094 }
2095
2096 /* Mark CUM_V that a function argument will occupy HW register slot starting
2097 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2098 depends on the MODE and TYPE of the argument. */
2099 static void
2100 pru_function_arg_regi_mark_slot (int regi,
2101 cumulative_args_t cum_v,
2102 machine_mode mode, const_tree type,
2103 bool named)
2104 {
2105 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2106 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2107
2108 gcc_assert (named);
2109
2110 /* Mark all byte sub-registers occupied by argument as used. */
2111 while (param_size--)
2112 {
2113 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2114 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2115 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2116 regi++;
2117 }
2118 }
2119
2120 /* Define where to put the arguments to a function. Value is zero to
2121 push the argument on the stack, or a hard register in which to
2122 store the argument.
2123
2124 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2125 the preceding args and about the function being called.
2126 ARG is a description of the argument. */
2127
2128 static rtx
2129 pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2130 {
2131 rtx return_rtx = NULL_RTX;
2132 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2133
2134 if (regi >= 0)
2135 return_rtx = gen_rtx_REG (arg.mode, regi);
2136
2137 return return_rtx;
2138 }
2139
2140 /* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2141 between registers and memory, so we can return 0. */
2142
2143 static int
2144 pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
2145 {
2146 return 0;
2147 }
2148
2149 /* Update the data in CUM to advance over argument ARG. */
2150
2151 static void
2152 pru_function_arg_advance (cumulative_args_t cum_v,
2153 const function_arg_info &arg)
2154 {
2155 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2156
2157 if (regi >= 0)
2158 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2159 arg.type, arg.named);
2160 }
2161
2162 /* Implement TARGET_FUNCTION_VALUE. */
2163 static rtx
2164 pru_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
2165 bool outgoing ATTRIBUTE_UNUSED)
2166 {
2167 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2168 }
2169
2170 /* Implement TARGET_LIBCALL_VALUE. */
2171 static rtx
2172 pru_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
2173 {
2174 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2175 }
2176
2177 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2178 static bool
2179 pru_function_value_regno_p (const unsigned int regno)
2180 {
2181 return regno == FIRST_RETVAL_REGNUM;
2182 }
2183
2184 /* Implement TARGET_RETURN_IN_MEMORY. */
2185 bool
2186 pru_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2187 {
2188 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2189 || int_size_in_bytes (type) == -1);
2190
2191 return in_memory;
2192 }
2193 \f
2194 /* Implement TARGET_CAN_USE_DOLOOP_P. */
2195
2196 static bool
2197 pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2198 unsigned int loop_depth, bool)
2199 {
2200 /* Considering limitations in the hardware, only use doloop
2201 for innermost loops which must be entered from the top. */
2202 if (loop_depth > 1)
2203 return false;
2204 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2205 holds the maximum number of loop latch executions, while PRU loop
2206 instruction needs the count of loop body executions. */
2207 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2208 return false;
2209
2210 return true;
2211 }
2212
2213 /* NULL if INSN insn is valid within a low-overhead loop.
2214 Otherwise return why doloop cannot be applied. */
2215
2216 static const char *
2217 pru_invalid_within_doloop (const rtx_insn *insn)
2218 {
2219 if (CALL_P (insn))
2220 return "Function call in the loop.";
2221
2222 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2223 return "Return from a call instruction in the loop.";
2224
2225 if (NONDEBUG_INSN_P (insn)
2226 && INSN_CODE (insn) < 0
2227 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2228 || asm_noperands (PATTERN (insn)) >= 0))
2229 return "Loop contains asm statement.";
2230
2231 return NULL;
2232 }
2233
2234
2235 /* Figure out where to put LABEL, which is the label for a repeat loop.
2236 The loop ends just before LAST_INSN. If SHARED, insns other than the
2237 "repeat" might use LABEL to jump to the loop's continuation point.
2238
2239 Return the last instruction in the adjusted loop. */
2240
2241 static rtx_insn *
2242 pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2243 bool shared)
2244 {
2245 rtx_insn *next, *prev;
2246 int count = 0, code, icode;
2247
2248 if (dump_file)
2249 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2250 INSN_UID (last_insn));
2251
2252 /* Set PREV to the last insn in the loop. */
2253 prev = PREV_INSN (last_insn);
2254
2255 /* Set NEXT to the next insn after the loop label. */
2256 next = last_insn;
2257 if (!shared)
2258 while (prev != 0)
2259 {
2260 code = GET_CODE (prev);
2261 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2262 break;
2263
2264 if (INSN_P (prev))
2265 {
2266 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2267 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2268
2269 /* Other insns that should not be in the last two opcodes. */
2270 icode = recog_memoized (prev);
2271 if (icode < 0
2272 || icode == CODE_FOR_pruloophi
2273 || icode == CODE_FOR_pruloopsi)
2274 break;
2275
2276 count++;
2277 next = prev;
2278 if (dump_file)
2279 print_rtl_single (dump_file, next);
2280 if (count == 2)
2281 break;
2282 }
2283 prev = PREV_INSN (prev);
2284 }
2285
2286 /* Insert the nops. */
2287 if (dump_file && count < 2)
2288 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2289 2 - count, count == 1 ? "" : "s");
2290
2291 for (; count < 2; count++)
2292 emit_insn_before (gen_nop (), last_insn);
2293
2294 /* Insert the label. */
2295 emit_label_before (label, last_insn);
2296
2297 return last_insn;
2298 }
2299
2300 /* If IS_END is false, expand a canonical doloop_begin RTL into the
2301 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2302 doloop_end_internal. */
2303 void
2304 pru_emit_doloop (rtx *operands, int is_end)
2305 {
2306 rtx tag;
2307
2308 if (cfun->machine->doloop_tags == 0
2309 || cfun->machine->doloop_tag_from_end == is_end)
2310 {
2311 cfun->machine->doloop_tags++;
2312 cfun->machine->doloop_tag_from_end = is_end;
2313 }
2314
2315 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2316 machine_mode opmode = GET_MODE (operands[0]);
2317 gcc_assert (opmode == HImode || opmode == SImode);
2318
2319 if (is_end)
2320 emit_jump_insn (gen_doloop_end_internal (opmode, operands[0],
2321 operands[1], tag));
2322 else
2323 emit_insn (gen_doloop_begin_internal (opmode, operands[0],
2324 operands[0], tag));
2325 }
2326
2327
2328 /* Code for converting doloop_begins and doloop_ends into valid
2329 PRU instructions. Idea and code snippets borrowed from mep port.
2330
2331 A doloop_begin is just a placeholder:
2332
2333 $count = unspec ($count)
2334
2335 where $count is initially the number of iterations.
2336 doloop_end has the form:
2337
2338 if (--$count == 0) goto label
2339
2340 The counter variable is private to the doloop insns, nothing else
2341 relies on its value.
2342
2343 There are three cases, in decreasing order of preference:
2344
2345 1. A loop has exactly one doloop_begin and one doloop_end.
2346 The doloop_end branches to the first instruction after
2347 the doloop_begin.
2348
2349 In this case we can replace the doloop_begin with a LOOP
2350 instruction and remove the doloop_end. I.e.:
2351
2352 $count1 = unspec ($count1)
2353 label:
2354 ...
2355 if (--$count2 != 0) goto label
2356
2357 becomes:
2358
2359 LOOP end_label,$count1
2360 label:
2361 ...
2362 end_label:
2363 # end loop
2364
2365 2. As for (1), except there are several doloop_ends. One of them
2366 (call it X) falls through to a label L. All the others fall
2367 through to branches to L.
2368
2369 In this case, we remove X and replace the other doloop_ends
2370 with branches to the LOOP label. For example:
2371
2372 $count1 = unspec ($count1)
2373 label:
2374 ...
2375 if (--$count1 != 0) goto label
2376 end_label:
2377 ...
2378 if (--$count2 != 0) goto label
2379 goto end_label
2380
2381 becomes:
2382
2383 LOOP end_label,$count1
2384 label:
2385 ...
2386 end_label:
2387 # end repeat
2388 ...
2389 goto end_label
2390
2391 3. The fallback case. Replace doloop_begins with:
2392
2393 $count = $count
2394
2395 Replace doloop_ends with the equivalent of:
2396
2397 $count = $count - 1
2398 if ($count != 0) goto loop_label
2399
2400 */
2401
2402 /* A structure describing one doloop_begin. */
2403 struct pru_doloop_begin {
2404 /* The next doloop_begin with the same tag. */
2405 struct pru_doloop_begin *next;
2406
2407 /* The instruction itself. */
2408 rtx_insn *insn;
2409
2410 /* The initial counter value. */
2411 rtx loop_count;
2412
2413 /* The counter register. */
2414 rtx counter;
2415 };
2416
2417 /* A structure describing a doloop_end. */
2418 struct pru_doloop_end {
2419 /* The next doloop_end with the same loop tag. */
2420 struct pru_doloop_end *next;
2421
2422 /* The instruction itself. */
2423 rtx_insn *insn;
2424
2425 /* The first instruction after INSN when the branch isn't taken. */
2426 rtx_insn *fallthrough;
2427
2428 /* The location of the counter value. Since doloop_end_internal is a
2429 jump instruction, it has to allow the counter to be stored anywhere
2430 (any non-fixed register). */
2431 rtx counter;
2432
2433 /* The target label (the place where the insn branches when the counter
2434 isn't zero). */
2435 rtx label;
2436
2437 /* A scratch register. Only available when COUNTER isn't stored
2438 in a general register. */
2439 rtx scratch;
2440 };
2441
2442
2443 /* One do-while loop. */
2444 struct pru_doloop {
2445 /* All the doloop_begins for this loop (in no particular order). */
2446 struct pru_doloop_begin *begin;
2447
2448 /* All the doloop_ends. When there is more than one, arrange things
2449 so that the first one is the most likely to be X in case (2) above. */
2450 struct pru_doloop_end *end;
2451 };
2452
2453
2454 /* Return true if LOOP can be converted into LOOP form
2455 (that is, if it matches cases (1) or (2) above). */
2456
2457 static bool
2458 pru_repeat_loop_p (struct pru_doloop *loop)
2459 {
2460 struct pru_doloop_end *end;
2461 rtx_insn *fallthrough;
2462
2463 /* There must be exactly one doloop_begin and at least one doloop_end. */
2464 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2465 return false;
2466
2467 /* The first doloop_end (X) must branch back to the insn after
2468 the doloop_begin. */
2469 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2470 return false;
2471
2472 /* Check that the first doloop_end (X) can actually reach
2473 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2474 if (get_attr_length (loop->end->insn) != 4)
2475 return false;
2476
2477 /* All the other doloop_ends must branch to the same place as X.
2478 When the branch isn't taken, they must jump to the instruction
2479 after X. */
2480 fallthrough = loop->end->fallthrough;
2481 for (end = loop->end->next; end != 0; end = end->next)
2482 if (end->label != loop->end->label
2483 || !simplejump_p (end->fallthrough)
2484 || fallthrough
2485 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2486 return false;
2487
2488 return true;
2489 }
2490
2491
2492 /* The main repeat reorg function. See comment above for details. */
2493
2494 static void
2495 pru_reorg_loop (rtx_insn *insns)
2496 {
2497 rtx_insn *insn;
2498 struct pru_doloop *loops, *loop;
2499 struct pru_doloop_begin *begin;
2500 struct pru_doloop_end *end;
2501 size_t tmpsz;
2502
2503 /* Quick exit if we haven't created any loops. */
2504 if (cfun->machine->doloop_tags == 0)
2505 return;
2506
2507 /* Create an array of pru_doloop structures. */
2508 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2509 loops = (struct pru_doloop *) alloca (tmpsz);
2510 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2511
2512 /* Search the function for do-while insns and group them by loop tag. */
2513 for (insn = insns; insn; insn = NEXT_INSN (insn))
2514 if (INSN_P (insn))
2515 switch (recog_memoized (insn))
2516 {
2517 case CODE_FOR_doloop_begin_internalhi:
2518 case CODE_FOR_doloop_begin_internalsi:
2519 insn_extract (insn);
2520 loop = &loops[INTVAL (recog_data.operand[2])];
2521
2522 tmpsz = sizeof (struct pru_doloop_begin);
2523 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2524 begin->next = loop->begin;
2525 begin->insn = insn;
2526 begin->loop_count = recog_data.operand[1];
2527 begin->counter = recog_data.operand[0];
2528
2529 loop->begin = begin;
2530 break;
2531
2532 case CODE_FOR_doloop_end_internalhi:
2533 case CODE_FOR_doloop_end_internalsi:
2534 insn_extract (insn);
2535 loop = &loops[INTVAL (recog_data.operand[2])];
2536
2537 tmpsz = sizeof (struct pru_doloop_end);
2538 end = (struct pru_doloop_end *) alloca (tmpsz);
2539 end->insn = insn;
2540 end->fallthrough = next_real_insn (insn);
2541 end->counter = recog_data.operand[0];
2542 end->label = recog_data.operand[1];
2543 end->scratch = recog_data.operand[3];
2544
2545 /* If this insn falls through to an unconditional jump,
2546 give it a lower priority than the others. */
2547 if (loop->end != 0 && simplejump_p (end->fallthrough))
2548 {
2549 end->next = loop->end->next;
2550 loop->end->next = end;
2551 }
2552 else
2553 {
2554 end->next = loop->end;
2555 loop->end = end;
2556 }
2557 break;
2558 }
2559
2560 /* Convert the insns for each loop in turn. */
2561 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2562 if (pru_repeat_loop_p (loop))
2563 {
2564 /* Case (1) or (2). */
2565 rtx_code_label *repeat_label;
2566 rtx label_ref;
2567 rtx loop_rtx;
2568
2569 /* Create a new label for the repeat insn. */
2570 repeat_label = gen_label_rtx ();
2571
2572 /* Replace the doloop_begin with a repeat. We get rid
2573 of the iteration register because LOOP instruction
2574 will utilize an internal for the PRU core LOOP register. */
2575 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2576 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
2577 if (loop_mode == VOIDmode)
2578 {
2579 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2580 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
2581 loop_mode = SImode;
2582 }
2583 gcc_assert (loop_mode == HImode || loop_mode == SImode);
2584 loop_rtx = gen_pruloop (loop_mode, loop->begin->loop_count, label_ref);
2585 emit_insn_before (loop_rtx, loop->begin->insn);
2586
2587 delete_insn (loop->begin->insn);
2588
2589 /* Insert the repeat label before the first doloop_end.
2590 Fill the gap with nops if LOOP insn is less than 2
2591 instructions away than loop->end. */
2592 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2593 loop->end->next != 0);
2594
2595 /* Emit a pruloop_end (to improve the readability of the output). */
2596 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2597
2598 /* HACK: TODO: This is usually not needed, but is required for
2599 a few rare cases where a JUMP that breaks the loop
2600 references the LOOP_END address. In other words, since
2601 we're missing a real "loop_end" instruction, a loop "break"
2602 may accidentally reference the loop end itself, and thus
2603 continuing the cycle. */
2604 for (insn = NEXT_INSN (loop->end->insn);
2605 insn != next_real_insn (loop->end->insn);
2606 insn = NEXT_INSN (insn))
2607 {
2608 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2609 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2610 }
2611
2612 /* Delete the first doloop_end. */
2613 delete_insn (loop->end->insn);
2614
2615 /* Replace the others with branches to REPEAT_LABEL. */
2616 for (end = loop->end->next; end != 0; end = end->next)
2617 {
2618 rtx_insn *newjmp;
2619 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2620 JUMP_LABEL (newjmp) = repeat_label;
2621 delete_insn (end->insn);
2622 delete_insn (end->fallthrough);
2623 }
2624 }
2625 else
2626 {
2627 /* Case (3). First replace all the doloop_begins with setting
2628 the HW register used for loop counter. */
2629 for (begin = loop->begin; begin != 0; begin = begin->next)
2630 {
2631 insn = gen_move_insn (copy_rtx (begin->counter),
2632 copy_rtx (begin->loop_count));
2633 emit_insn_before (insn, begin->insn);
2634 delete_insn (begin->insn);
2635 }
2636
2637 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2638 for (end = loop->end; end != 0; end = end->next)
2639 {
2640 rtx reg;
2641
2642 start_sequence ();
2643
2644 /* Load the counter value into a general register. */
2645 reg = end->counter;
2646 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2647 {
2648 reg = end->scratch;
2649 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2650 }
2651
2652 /* Decrement the counter. */
2653 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2654 constm1_rtx));
2655
2656 /* Copy it back to its original location. */
2657 if (reg != end->counter)
2658 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2659
2660 /* Jump back to the start label. */
2661 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2662 const0_rtx),
2663 reg,
2664 const0_rtx,
2665 end->label));
2666
2667 JUMP_LABEL (insn) = end->label;
2668 LABEL_NUSES (end->label)++;
2669
2670 /* Emit the whole sequence before the doloop_end. */
2671 insn = get_insns ();
2672 end_sequence ();
2673 emit_insn_before (insn, end->insn);
2674
2675 /* Delete the doloop_end. */
2676 delete_insn (end->insn);
2677 }
2678 }
2679 }
2680
2681 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2682 static void
2683 pru_reorg (void)
2684 {
2685 rtx_insn *insns = get_insns ();
2686
2687 compute_bb_for_insn ();
2688 df_analyze ();
2689
2690 /* Need correct insn lengths for allowing LOOP instruction
2691 emitting due to U8_PCREL limitations. */
2692 shorten_branches (get_insns ());
2693
2694 /* The generic reorg_loops () is not suitable for PRU because
2695 it doesn't handle doloop_begin/end tying. And we need our
2696 doloop_begin emitted before reload. It is difficult to coalesce
2697 UBYTE constant initial loop values into the LOOP insn during
2698 machine reorg phase. */
2699 pru_reorg_loop (insns);
2700
2701 df_finish_pass (false);
2702 }
2703 \f
2704 /* Enumerate all PRU-specific builtins. */
2705 enum pru_builtin
2706 {
2707 PRU_BUILTIN_DELAY_CYCLES,
2708 PRU_BUILTIN_HALT,
2709 PRU_BUILTIN_LMBD,
2710 PRU_BUILTIN_max
2711 };
2712
2713 static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2714
2715 /* Implement TARGET_INIT_BUILTINS. */
2716
2717 static void
2718 pru_init_builtins (void)
2719 {
2720 tree void_ftype_longlong
2721 = build_function_type_list (void_type_node,
2722 long_long_integer_type_node,
2723 NULL);
2724 tree uint_ftype_uint_uint
2725 = build_function_type_list (unsigned_type_node,
2726 unsigned_type_node,
2727 unsigned_type_node,
2728 NULL);
2729
2730 tree void_ftype_void
2731 = build_function_type_list (void_type_node,
2732 void_type_node,
2733 NULL);
2734
2735 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2736 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2737 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2738 NULL_TREE);
2739
2740 pru_builtins[PRU_BUILTIN_HALT]
2741 = add_builtin_function ("__halt", void_ftype_void,
2742 PRU_BUILTIN_HALT, BUILT_IN_MD, NULL,
2743 NULL_TREE);
2744
2745 pru_builtins[PRU_BUILTIN_LMBD]
2746 = add_builtin_function ("__lmbd", uint_ftype_uint_uint,
2747 PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL,
2748 NULL_TREE);
2749 }
2750
2751 /* Implement TARGET_BUILTIN_DECL. */
2752
2753 static tree
2754 pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
2755 {
2756 switch (code)
2757 {
2758 case PRU_BUILTIN_DELAY_CYCLES:
2759 case PRU_BUILTIN_HALT:
2760 case PRU_BUILTIN_LMBD:
2761 return pru_builtins[code];
2762 default:
2763 return error_mark_node;
2764 }
2765 }
2766 \f
2767 /* Emit a sequence of one or more delay_cycles_X insns, in order to generate
2768 code that delays exactly ARG cycles. */
2769
2770 static rtx
2771 pru_expand_delay_cycles (rtx arg)
2772 {
2773 HOST_WIDE_INT c, n;
2774
2775 if (GET_CODE (arg) != CONST_INT)
2776 {
2777 error ("%<__delay_cycles%> only takes constant arguments");
2778 return NULL_RTX;
2779 }
2780
2781 c = INTVAL (arg);
2782
2783 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
2784 if (c < 0)
2785 {
2786 error ("%<__delay_cycles%> only takes non-negative cycle counts");
2787 return NULL_RTX;
2788 }
2789
2790 emit_insn (gen_delay_cycles_start (arg));
2791
2792 /* For 32-bit loops, there's 2 + 2x cycles. */
2793 if (c > 2 * 0xffff + 1)
2794 {
2795 n = (c - 2) / 2;
2796 c -= (n * 2) + 2;
2797 if ((unsigned long long) n > 0xffffffffULL)
2798 {
2799 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
2800 return NULL_RTX;
2801 }
2802 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
2803 }
2804
2805 /* For 16-bit loops, there's 1 + 2x cycles. */
2806 if (c > 2)
2807 {
2808 n = (c - 1) / 2;
2809 c -= (n * 2) + 1;
2810
2811 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
2812 }
2813
2814 while (c > 0)
2815 {
2816 emit_insn (gen_delay_cycles_1 ());
2817 c -= 1;
2818 }
2819
2820 emit_insn (gen_delay_cycles_end (arg));
2821
2822 return NULL_RTX;
2823 }
2824
2825
2826 /* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
2827 a built-in function, with result going to TARGET if that's convenient
2828 (and in mode MODE if that's convenient).
2829 SUBTARGET may be used as the target for computing one of EXP's operands.
2830 IGNORE is nonzero if the value is to be ignored. */
2831
2832 static rtx
2833 pru_expand_builtin (tree exp, rtx target,
2834 rtx subtarget ATTRIBUTE_UNUSED,
2835 machine_mode mode,
2836 int ignore ATTRIBUTE_UNUSED)
2837 {
2838 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
2839 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
2840
2841 switch (fcode)
2842 {
2843 case PRU_BUILTIN_DELAY_CYCLES:
2844 {
2845 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
2846 return pru_expand_delay_cycles (arg1);
2847 }
2848 break;
2849 case PRU_BUILTIN_HALT:
2850 {
2851 emit_insn (gen_pru_halt ());
2852 return NULL_RTX;
2853 }
2854 break;
2855 case PRU_BUILTIN_LMBD:
2856 {
2857 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
2858 rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
2859
2860 if (target == NULL_RTX || GET_MODE (target) != mode)
2861 {
2862 target = gen_reg_rtx (mode);
2863 }
2864
2865 emit_insn (gen_pru_lmbd (mode, target, arg1, arg2));
2866 return target;
2867 }
2868 break;
2869 default:
2870 internal_error ("bad builtin code");
2871 }
2872
2873 return NULL_RTX;
2874 }
2875 \f
2876 /* Remember the last target of pru_set_current_function. */
2877 static GTY(()) tree pru_previous_fndecl;
2878
2879 /* Establish appropriate back-end context for processing the function
2880 FNDECL. The argument might be NULL to indicate processing at top
2881 level, outside of any function scope. */
2882 static void
2883 pru_set_current_function (tree fndecl)
2884 {
2885 tree old_tree = (pru_previous_fndecl
2886 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
2887 : NULL_TREE);
2888
2889 tree new_tree = (fndecl
2890 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
2891 : NULL_TREE);
2892
2893 if (fndecl && fndecl != pru_previous_fndecl)
2894 {
2895 pru_previous_fndecl = fndecl;
2896 if (old_tree == new_tree)
2897 ;
2898
2899 else if (new_tree)
2900 {
2901 cl_target_option_restore (&global_options, &global_options_set,
2902 TREE_TARGET_OPTION (new_tree));
2903 target_reinit ();
2904 }
2905
2906 else if (old_tree)
2907 {
2908 struct cl_target_option *def
2909 = TREE_TARGET_OPTION (target_option_current_node);
2910
2911 cl_target_option_restore (&global_options, &global_options_set, def);
2912 target_reinit ();
2913 }
2914 }
2915 }
2916 \f
2917 /* Implement TARGET_UNWIND_WORD_MODE.
2918
2919 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
2920 static scalar_int_mode
2921 pru_unwind_word_mode (void)
2922 {
2923 return SImode;
2924 }
2925 \f
2926
2927 /* Initialize the GCC target structure. */
2928 #undef TARGET_ASM_FUNCTION_PROLOGUE
2929 #define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
2930 #undef TARGET_ASM_INTEGER
2931 #define TARGET_ASM_INTEGER pru_assemble_integer
2932
2933 #undef TARGET_ASM_FILE_START
2934 #define TARGET_ASM_FILE_START pru_file_start
2935
2936 #undef TARGET_INIT_BUILTINS
2937 #define TARGET_INIT_BUILTINS pru_init_builtins
2938 #undef TARGET_EXPAND_BUILTIN
2939 #define TARGET_EXPAND_BUILTIN pru_expand_builtin
2940 #undef TARGET_BUILTIN_DECL
2941 #define TARGET_BUILTIN_DECL pru_builtin_decl
2942
2943 #undef TARGET_COMPUTE_FRAME_LAYOUT
2944 #define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
2945
2946 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2947 #define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
2948
2949 #undef TARGET_CAN_ELIMINATE
2950 #define TARGET_CAN_ELIMINATE pru_can_eliminate
2951
2952 #undef TARGET_HARD_REGNO_MODE_OK
2953 #define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
2954
2955 #undef TARGET_HARD_REGNO_SCRATCH_OK
2956 #define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
2957
2958 #undef TARGET_FUNCTION_ARG
2959 #define TARGET_FUNCTION_ARG pru_function_arg
2960
2961 #undef TARGET_FUNCTION_ARG_ADVANCE
2962 #define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
2963
2964 #undef TARGET_ARG_PARTIAL_BYTES
2965 #define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
2966
2967 #undef TARGET_FUNCTION_VALUE
2968 #define TARGET_FUNCTION_VALUE pru_function_value
2969
2970 #undef TARGET_LIBCALL_VALUE
2971 #define TARGET_LIBCALL_VALUE pru_libcall_value
2972
2973 #undef TARGET_FUNCTION_VALUE_REGNO_P
2974 #define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
2975
2976 #undef TARGET_RETURN_IN_MEMORY
2977 #define TARGET_RETURN_IN_MEMORY pru_return_in_memory
2978
2979 #undef TARGET_MUST_PASS_IN_STACK
2980 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
2981
2982 #undef TARGET_LEGITIMATE_ADDRESS_P
2983 #define TARGET_LEGITIMATE_ADDRESS_P pru_legitimate_address_p
2984
2985 #undef TARGET_INIT_LIBFUNCS
2986 #define TARGET_INIT_LIBFUNCS pru_init_libfuncs
2987 #undef TARGET_LIBFUNC_GNU_PREFIX
2988 #define TARGET_LIBFUNC_GNU_PREFIX true
2989
2990 #undef TARGET_RTX_COSTS
2991 #define TARGET_RTX_COSTS pru_rtx_costs
2992
2993 #undef TARGET_PRINT_OPERAND
2994 #define TARGET_PRINT_OPERAND pru_print_operand
2995
2996 #undef TARGET_PRINT_OPERAND_ADDRESS
2997 #define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
2998
2999 #undef TARGET_OPTION_OVERRIDE
3000 #define TARGET_OPTION_OVERRIDE pru_option_override
3001
3002 #undef TARGET_SET_CURRENT_FUNCTION
3003 #define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3004
3005 #undef TARGET_MACHINE_DEPENDENT_REORG
3006 #define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3007
3008 #undef TARGET_CAN_USE_DOLOOP_P
3009 #define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3010
3011 #undef TARGET_INVALID_WITHIN_DOLOOP
3012 #define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3013
3014 #undef TARGET_UNWIND_WORD_MODE
3015 #define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3016
3017 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3018 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3019
3020 struct gcc_target targetm = TARGET_INITIALIZER;
3021
3022 #include "gt-pru.h"