1 ;; Machine Description for TI PRU.
2 ;; Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
4 ;; Based on the NIOS2 GCC port.
6 ;; This file is part of GCC.
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
25 (FIRST_ARG_REGNUM 56) ; Argument registers.
26 (LAST_ARG_REGNUM 119) ;
27 (FIRST_RETVAL_REGNUM 56) ; Return value registers.
28 (LAST_RETVAL_REGNUM 60) ;
29 (FIRST_CALLEE_SAVED_REGNUM 12) ; Callee saved registers.
30 (LAST_CALEE_SAVED_REGNUM 55) ;
31 (PROLOGUE_TEMP_REGNUM 4) ; Temporary register to use in prologue.
33 (RA_REGNUM 14) ; Return address register r3.w2.
34 (FP_REGNUM 16) ; Frame pointer register.
35 (MULDST_REGNUM 104) ; Multiply destination register.
36 (MULSRC0_REGNUM 112) ; Multiply source register.
37 (MULSRC1_REGNUM 116) ; Multiply source register.
38 (LAST_NONIO_GP_REGNUM 119) ; Last non-I/O general purpose register.
39 (R30_REGNUM 120) ; R30 I/O register.
40 (R31_REGNUM 124) ; R31 I/O register.
41 (LOOPCNTR_REGNUM 128) ; internal LOOP counter register
42 (LAST_GP_REGNUM 132) ; Last general purpose register.
44 ;; Target register definitions.
45 (STACK_POINTER_REGNUM 8)
46 (HARD_FRAME_POINTER_REGNUM FP_REGNUM)
48 (FRAME_POINTER_REGNUM 136)
49 (ARG_POINTER_REGNUM 140)
50 (FIRST_PSEUDO_REGISTER 144)
54 ;; Enumerate address spaces.
57 (ADDR_SPACE_REGIO 1) ; Access to R30 and R31 I/O registers.
61 ;; Enumeration of UNSPECs.
63 (define_c_enum "unspec" [
67 (define_c_enum "unspecv" [
68 UNSPECV_DELAY_CYCLES_START
69 UNSPECV_DELAY_CYCLES_END
70 UNSPECV_DELAY_CYCLES_2X_HI
71 UNSPECV_DELAY_CYCLES_2X_SI
72 UNSPECV_DELAY_CYCLES_1
85 ; Length of an instruction (in bytes).
86 (define_attr "length" "" (const_int 4))
88 "unknown,complex,control,alu,cond_alu,st,ld,shift"
89 (const_string "complex"))
91 (define_asm_attributes
92 [(set_attr "length" "4")
93 (set_attr "type" "complex")])
95 ; There is no pipeline, so our scheduling description is simple.
96 (define_automaton "pru")
97 (define_cpu_unit "cpu" "pru")
99 (define_insn_reservation "everything" 1 (match_test "true") "cpu")
101 (include "predicates.md")
102 (include "constraints.md")
104 ;; All supported direct move-modes
105 (define_mode_iterator MOV8_16_32 [QI QQ UQQ
107 SI SQ USQ SA USA SF SD])
109 (define_mode_iterator MOV8_16 [QI QQ UQQ
111 (define_mode_iterator MOV32 [SI SQ USQ SA USA SF SD])
112 (define_mode_iterator MOV64 [DI DF DD DQ UDQ])
113 (define_mode_iterator QISI [QI HI SI])
114 (define_mode_iterator HISI [HI SI])
115 (define_mode_iterator HIDI [HI SI DI])
116 (define_mode_iterator SFDF [SF DF])
118 ;; EQS0/1 for extension source 0/1 and EQD for extension destination patterns.
119 (define_mode_iterator EQS0 [QI HI SI])
120 (define_mode_iterator EQS1 [QI HI SI])
121 (define_mode_iterator EQD [QI HI SI])
122 (define_mode_iterator EQDHIDI [HI SI DI])
124 ;; GCC sign-extends its integer constants. Hence 0x80 will be represented
125 ;; as -128 for QI mode and 128 for HI and SI modes. To cope with this,
126 ;; use different constraints to match UBYTE in different modes.
128 ;; Wherever this iterator is used, the corresponding operand has the 'u'
129 ;; print format modifier. That is how the QI signedness is cured, and
130 ;; the generated assembly contains unsigned constants.
132 ;; If the pattern has no QI operands, then this iterator need not be used.
134 ;; Note that we do not require "uhword_constr" since ALU instructions
135 ;; can use only UBYTE constants. The MOV patterns are already separately
136 ;; defined for each size, hence no need for an iterator.
137 (define_mode_attr ubyte_constr [(QI "O") (HI "I") (SI "I")])
141 (define_expand "mov<mode>"
142 [(set (match_operand:MOV8_16_32 0 "nonimmediate_operand")
143 (match_operand:MOV8_16_32 1 "general_operand"))]
146 if (MEM_P (operands[0])
147 && MEM_ADDR_SPACE (operands[0]) == ADDR_SPACE_REGIO)
150 /* Intercept writes to the SImode register I/O "address space". */
151 gcc_assert (<MODE>mode == SImode);
153 if (!SYMBOL_REF_P (XEXP (operands[0], 0)))
155 error ("invalid access to %<__regio_symbol%> address space");
159 if (!REG_P (operands[1]))
160 operands[1] = force_reg (<MODE>mode, operands[1]);
162 int regiono = pru_symref2ioregno (XEXP (operands[0], 0));
163 gcc_assert (regiono >= 0);
164 rtx regio = gen_rtx_REG (<MODE>mode, regiono);
165 rtx unspecv = gen_rtx_UNSPEC_VOLATILE (<MODE>mode,
166 gen_rtvec (1, operands[1]),
167 UNSPECV_REGIO_WRITE);
168 emit_insn (gen_rtx_SET (regio, unspecv));
171 else if (MEM_P (operands[1])
172 && MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_REGIO)
174 /* Intercept reads from the SImode register I/O "address space". */
175 gcc_assert (<MODE>mode == SImode);
177 if (!SYMBOL_REF_P (XEXP (operands[1], 0)))
179 error ("invalid access to %<__regio_symbol%> address space");
183 if (MEM_P (operands[0]))
184 operands[0] = force_reg (<MODE>mode, operands[0]);
186 int regiono = pru_symref2ioregno (XEXP (operands[1], 0));
187 gcc_assert (regiono >= 0);
188 rtx regio = gen_rtx_REG (<MODE>mode, regiono);
189 rtx unspecv = gen_rtx_UNSPEC_VOLATILE (<MODE>mode,
190 gen_rtvec (1, regio),
192 emit_insn (gen_rtx_SET (operands[0], unspecv));
195 else if (MEM_P (operands[0]))
197 /* It helps to split constant loading and memory access
198 early, so that the LDI/LDI32 instructions can be hoisted
199 outside a loop body. */
200 operands[1] = force_reg (<MODE>mode, operands[1]);
204 ;; Keep a single pattern for 32 bit MOV operations. LRA requires that the
205 ;; movXX patterns be unified for any given mode.
207 ;; Note: Assume that Program Mem (T constraint) can fit in 16 bits!
208 (define_insn "prumov<mode>"
209 [(set (match_operand:MOV32 0 "nonimmediate_operand" "=m,r,r,r,r,r,r")
210 (match_operand:MOV32 1 "general_operand" "r,m,r,T,J,Um,iF"))]
213 sb%B0o\\t%b1, %0, %S0
214 lb%B1o\\t%b0, %1, %S1
220 [(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
221 (set_attr "length" "4,4,4,4,4,4,8")])
224 ;; Separate pattern for 8 and 16 bit moves, since LDI32 pseudo instruction
225 ;; cannot handle byte and word-sized registers.
227 ;; Note: Constraint N is fine for both QI and HI mode, since it is used
228 ;; in the context of 16 bit constant integer.
229 (define_insn "prumov<mode>"
230 [(set (match_operand:MOV8_16 0 "nonimmediate_operand" "=m,r,r,r,r")
231 (match_operand:MOV8_16 1 "general_operand" "r,m,r,T,N"))]
234 sb%B0o\\t%b1, %0, %S0
235 lb%B1o\\t%b0, %1, %S1
238 ldi\\t%0, (%1) & 0xffff"
239 [(set_attr "type" "st,ld,alu,alu,alu")
240 (set_attr "length" "4")])
243 ; Pmode is 32 bits for PRU so symbolic constants cannot be 64 bits. Hence
244 ; this pattern handles only numeric constants.
246 ; Note: Unlike the arithmetics, here we cannot use "&" output modifier.
247 ; GCC expects to be able to move registers around "no matter what".
248 ; Forcing DI reg alignment (akin to microblaze's HARD_REGNO_MODE_OK)
249 ; does not seem efficient, and will violate TI ABI.
250 (define_insn "mov<mode>"
251 [(set (match_operand:MOV64 0 "nonimmediate_operand" "=m,r,r,r,r,r,r")
252 (match_operand:MOV64 1 "general_operand" "r,m,Um,r,T,J,nF"))]
255 switch (which_alternative)
258 return "sb%B0o\\t%b1, %0, %S0";
260 return "lb%B1o\\t%b0, %1, %S1";
262 return "fill\\t%F0, 8";
264 /* careful with overlapping source and destination regs. */
265 gcc_assert (GP_REG_P (REGNO (operands[0])));
266 gcc_assert (GP_REG_P (REGNO (operands[1])));
267 if (REGNO (operands[0]) == (REGNO (operands[1]) + 4))
268 return "mov\\t%N0, %N1\;mov\\t%F0, %F1";
270 return "mov\\t%F0, %F1\;mov\\t%N0, %N1";
272 return "ldi\\t%F0, %%pmem(%1)\;ldi\\t%N0, 0";
274 return "ldi\\t%F0, %1\;ldi\\t%N0, 0";
276 return "ldi32\\t%F0, %w1\;ldi32\\t%N0, %W1";
281 [(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
282 (set_attr "length" "4,4,4,8,8,8,16")])
285 ; load_multiple pattern(s).
287 ; ??? Due to reload problems with replacing registers inside match_parallel
288 ; we currently support load_multiple/store_multiple only after reload.
290 ; Idea taken from the s390 port.
292 (define_expand "load_multiple"
293 [(match_par_dup 3 [(set (match_operand 0 "")
294 (match_operand 1 ""))
295 (use (match_operand 2 ""))])]
302 poly_int64 base_offs;
305 /* Support only loading a constant number of fixed-point registers from
307 if (GET_CODE (operands[2]) != CONST_INT
308 || GET_CODE (operands[1]) != MEM
309 || GET_CODE (operands[0]) != REG)
312 count = INTVAL (operands[2]);
313 regno = REGNO (operands[0]);
314 mode = GET_MODE (operands[0]);
318 operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
320 gcc_assert (!can_create_pseudo_p ());
322 base_reg = strip_offset (XEXP (operands[1], 0), &base_offs);
323 if (GET_CODE (base_reg) != REG)
326 for (i = 0; i < count; i++)
327 XVECEXP (operands[3], 0, i)
328 = gen_rtx_SET (gen_rtx_REG (mode, regno + i),
329 change_address (operands[1], mode,
330 plus_constant (Pmode, base_reg,
331 base_offs + i * GET_MODE_SIZE (mode))));
334 (define_insn "*pru_load_multiple"
335 [(match_parallel 0 "load_multiple_operation"
336 [(set (match_operand:QI 1 "register_operand" "=r")
337 (match_operand:QI 2 "memory_operand" "m"))])]
340 int nregs = XVECLEN (operands[0], 0);
341 operands[0] = GEN_INT (nregs);
342 return "lb%B2o\\t%b1, %2, %0";
344 [(set_attr "type" "ld")])
347 ; store multiple pattern(s).
350 (define_expand "store_multiple"
351 [(match_par_dup 3 [(set (match_operand 0 "")
352 (match_operand 1 ""))
353 (use (match_operand 2 ""))])]
360 poly_int64 base_offs;
363 /* Support only storing a constant number of fixed-point registers to
365 if (GET_CODE (operands[2]) != CONST_INT
366 || GET_CODE (operands[0]) != MEM
367 || GET_CODE (operands[1]) != REG)
370 count = INTVAL (operands[2]);
371 regno = REGNO (operands[1]);
372 mode = GET_MODE (operands[1]);
376 operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
378 gcc_assert (!can_create_pseudo_p ());
380 base_reg = strip_offset (XEXP (operands[0], 0), &base_offs);
381 if (GET_CODE (base_reg) != REG)
384 for (i = 0; i < count; i++)
385 XVECEXP (operands[3], 0, i)
386 = gen_rtx_SET (change_address (operands[0], mode,
387 plus_constant (Pmode, base_reg,
388 base_offs + i * GET_MODE_SIZE (mode))),
389 gen_rtx_REG (mode, regno + i));
392 (define_insn "*pru_store_multiple"
393 [(match_parallel 0 "store_multiple_operation"
394 [(set (match_operand:QI 1 "memory_operand" "=m")
395 (match_operand:QI 2 "register_operand" "r"))])]
398 int nregs = XVECLEN (operands[0], 0);
399 operands[0] = GEN_INT (nregs);
400 return "sb%B1o\\t%b2, %1, %0";
402 [(set_attr "type" "st")])
404 ;; Zero extension patterns
406 ;; Unfortunately we cannot use lbbo to load AND zero-extent a value.
407 ;; The burst length parameter of the LBBO instruction designates not only
408 ;; the number of memory data bytes fetched, but also the number of register
409 ;; byte fields written.
410 (define_expand "zero_extend<EQS0:mode><EQD:mode>2"
411 [(set (match_operand:EQD 0 "register_operand")
412 (zero_extend:EQD (match_operand:EQS0 1 "register_operand")))]
416 (define_insn "*zero_extend<EQS0:mode><EQD:mode>2"
417 [(set (match_operand:EQD 0 "register_operand" "=r")
418 (zero_extend:EQD (match_operand:EQS0 1 "register_operand" "r")))]
421 [(set_attr "type" "alu")])
423 (define_insn "zero_extendqidi2"
424 [(set (match_operand:DI 0 "register_operand" "=r,r")
425 (zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))]
429 mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7"
430 [(set_attr "type" "alu,alu")
431 (set_attr "length" "4,8")])
433 (define_insn "zero_extendhidi2"
434 [(set (match_operand:DI 0 "register_operand" "=r,r")
435 (zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))]
439 mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6"
440 [(set_attr "type" "alu,alu")
441 (set_attr "length" "4,8")])
443 (define_insn "zero_extendsidi2"
444 [(set (match_operand:DI 0 "register_operand" "=r,r")
445 (zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))]
449 mov\\t%F0, %1\;zero\\t%N0, 4"
450 [(set_attr "type" "alu,alu")
451 (set_attr "length" "4,8")])
453 ;; Sign extension pattern. We have to emulate it due to lack of
454 ;; signed operations in PRU's ALU.
456 (define_expand "extend<EQS0:mode><EQDHIDI:mode>2"
457 [(set (match_operand:EQDHIDI 0 "register_operand" "=r")
458 (sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))]
461 rtx_code_label *skip_hiset_label;
463 /* Clear the higher bits to temporarily make the value positive. */
464 emit_insn (gen_rtx_SET (operands[0],
465 gen_rtx_ZERO_EXTEND (<EQDHIDI:MODE>mode,
468 /* Now check if the result must be made negative. */
469 skip_hiset_label = gen_label_rtx ();
470 const int op1_size = GET_MODE_SIZE (<EQS0:MODE>mode);
471 const int op1_sign_bit = op1_size * BITS_PER_UNIT - 1;
472 emit_jump_insn (gen_cbranch_qbbx_const (EQ,
475 GEN_INT (op1_sign_bit),
477 emit_insn (gen_ior<EQDHIDI:mode>3 (
480 GEN_INT (~GET_MODE_MASK (<EQS0:MODE>mode))));
481 emit_label (skip_hiset_label);
487 ;; We define it solely to allow combine to choose SImode
488 ;; for word mode when trying to match our cbranch_qbbx_* insn.
490 ;; Check how combine.cc:make_extraction() uses
491 ;; get_best_reg_extraction_insn() to select the op size.
492 (define_insn "extzv<mode>"
493 [(set (match_operand:QISI 0 "register_operand" "=r")
495 (match_operand:QISI 1 "register_operand" "r")
496 (match_operand:QISI 2 "const_int_operand" "i")
497 (match_operand:QISI 3 "const_int_operand" "i")))]
499 "lsl\\t%0, %1, (%S0 * 8 - %2 - %3)\;lsr\\t%0, %0, (%S0 * 8 - %2)"
500 [(set_attr "type" "complex")
501 (set_attr "length" "8")])
505 ;; Arithmetic Operations
507 (define_expand "add<mode>3"
508 [(set (match_operand:QISI 0 "register_operand")
509 (plus:QISI (match_operand:QISI 1 "register_operand")
510 (match_operand:QISI 2 "nonmemory_operand")))]
514 (define_insn "adddi3"
515 [(set (match_operand:DI 0 "register_operand" "=&r,&r,&r")
516 (plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
517 (match_operand:DI 2 "reg_or_ubyte_operand" "r,I,M")))]
520 add\\t%F0, %F1, %F2\;adc\\t%N0, %N1, %N2
521 add\\t%F0, %F1, %2\;adc\\t%N0, %N1, 0
522 sub\\t%F0, %F1, %n2\;suc\\t%N0, %N1, 0"
523 [(set_attr "type" "alu")
524 (set_attr "length" "8")])
526 (define_expand "sub<mode>3"
527 [(set (match_operand:QISI 0 "register_operand")
528 (minus:QISI (match_operand:QISI 1 "reg_or_ubyte_operand")
529 (match_operand:QISI 2 "reg_or_ubyte_operand")))]
533 (define_insn "subdi3"
534 [(set (match_operand:DI 0 "register_operand" "=&r,&r")
535 (minus:DI (match_operand:DI 1 "reg_or_ubyte_operand" "r,I")
536 (match_operand:DI 2 "register_operand" "r,r")))]
539 sub\\t%F0, %F1, %F2\;suc\\t%N0, %N1, %N2
540 rsb\\t%F0, %F2, %1\;rsc\\t%N0, %N2, 0"
541 [(set_attr "type" "alu")
542 (set_attr "length" "8")])
544 ;; Negate and ones complement
546 (define_expand "neg<mode>2"
547 [(set (match_operand:QISI 0 "register_operand")
548 (neg:QISI (match_operand:QISI 1 "register_operand")))]
552 (define_expand "one_cmpl<mode>2"
553 [(set (match_operand:QISI 0 "register_operand")
554 (not:QISI (match_operand:QISI 1 "register_operand")))]
558 ;; Integer logical Operations
560 ;; TODO - add optimized cases that exploit the fact that we can get away
561 ;; with a single machine op for special constants, e.g. UBYTE << (0/8/16/24)
563 (define_code_iterator LOGICAL [and ior xor umin umax])
564 (define_code_attr logical_asm [(and "and") (ior "or") (xor "xor") (umin "min") (umax "max")])
566 (define_code_iterator LOGICAL_BITOP [and ior xor])
567 (define_code_attr logical_bitop_asm [(and "and") (ior "or") (xor "xor")])
569 (define_expand "<code><mode>3"
570 [(set (match_operand:QISI 0 "register_operand")
571 (LOGICAL:QISI (match_operand:QISI 1 "register_operand")
572 (match_operand:QISI 2 "reg_or_ubyte_operand")))]
576 ;; Specialised IOR pattern, which can emit an efficient FILL instruction.
577 (define_insn "@pru_ior_fillbytes<mode>"
578 [(set (match_operand:HIDI 0 "register_operand" "=r")
580 (match_operand:HIDI 1 "register_operand" "0")
581 (match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))]
584 static char line[64];
587 r = pru_calc_byterange (INTVAL (operands[2]), <MODE>mode);
588 gcc_assert (r.start >=0 && r.nbytes > 0);
589 gcc_assert ((r.start + r.nbytes) <= GET_MODE_SIZE (<MODE>mode));
591 const int regno = REGNO (operands[0]) + r.start;
593 sprintf (line, "fill\\tr%d.b%d, %d", regno / 4, regno % 4, r.nbytes);
596 [(set_attr "type" "alu")
597 (set_attr "length" "4")])
599 ;; Specialised AND pattern, which can emit an efficient ZERO instruction.
600 (define_insn "@pru_and_zerobytes<mode>"
601 [(set (match_operand:HIDI 0 "register_operand" "=r")
603 (match_operand:HIDI 1 "register_operand" "0")
604 (match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))]
607 static char line[64];
610 r = pru_calc_byterange (~INTVAL (operands[2]), <MODE>mode);
611 gcc_assert (r.start >=0 && r.nbytes > 0);
612 gcc_assert ((r.start + r.nbytes) <= GET_MODE_SIZE (<MODE>mode));
614 const int regno = REGNO (operands[0]) + r.start;
616 sprintf (line, "zero\\tr%d.b%d, %d", regno / 4, regno % 4, r.nbytes);
619 [(set_attr "type" "alu")
620 (set_attr "length" "4")])
622 ;; Shift instructions
624 (define_code_iterator SHIFT [ashift lshiftrt])
625 (define_code_attr shift_op [(ashift "ashl") (lshiftrt "lshr")])
626 (define_code_attr shift_asm [(ashift "lsl") (lshiftrt "lsr")])
628 (define_expand "<shift_op><mode>3"
629 [(set (match_operand:QISI 0 "register_operand")
630 (SHIFT:QISI (match_operand:QISI 1 "register_operand")
631 (match_operand:QISI 2 "shift_operand")))]
635 ; Expand to a loop of single-position arithmetic shifts, which
636 ; we can handle. Pseudo code:
638 ; QImode cntr = nshifts & 0xff;
646 ; Note that the number of shifts is truncated to QImode. This is a fair
647 ; assumption for a loop-based shifting implementation.
648 (define_expand "ashr<mode>3"
649 [(set (match_operand:QISI 0 "register_operand")
651 (match_operand:QISI 1 "register_operand")
652 (match_operand:QI 2 "reg_or_const_1_operand")))]
655 rtx dst = operands[0];
656 rtx src = operands[1];
657 rtx nshifts = operands[2];
658 rtx_code_label *loop_label;
659 rtx_code_label *ashr_end_label;
660 rtx test, tmpval, cntr;
662 if (const_1_operand (nshifts, VOIDmode))
664 emit_insn (gen_ashr<mode>3_single (dst, src, nshifts));
668 tmpval = gen_reg_rtx (<MODE>mode);
669 emit_move_insn (tmpval, src);
671 cntr = gen_reg_rtx (QImode);
672 emit_move_insn (cntr, nshifts);
674 loop_label = gen_label_rtx ();
675 ashr_end_label = gen_label_rtx ();
677 emit_label (loop_label);
678 test = gen_rtx_EQ (VOIDmode, cntr, const0_rtx);
679 emit_jump_insn (gen_cbranchqi4 (test, cntr, const0_rtx, ashr_end_label));
681 emit_insn (gen_ashr<mode>3_single (tmpval, tmpval, const1_rtx));
682 emit_insn (gen_addqi3 (cntr, cntr, GEN_INT (-1)));
684 emit_jump_insn (gen_jump (loop_label));
685 JUMP_LABEL (get_last_insn ()) = loop_label;
686 LABEL_NUSES (loop_label)++;
689 emit_label (ashr_end_label);
691 emit_move_insn (dst, tmpval);
696 (define_insn "ashr<mode>3_single"
697 [(set (match_operand:QISI 0 "register_operand" "=r")
699 (match_operand:QISI 1 "register_operand" "r")
700 (match_operand:QI 2 "const_1_operand" "P")))]
702 "lsr\\t%0, %1, 1\;qbbc LSIGN%=, %0, (%S0 * 8) - 2\;set %0, %0, (%S0 * 8) - 1\;LSIGN%=:"
703 [(set_attr "type" "alu")
704 (set_attr "length" "12")])
707 ;; Include ALU patterns with zero-extension of operands. That's where
708 ;; the real insns are defined.
710 (include "alu-zext.md")
712 ;; Patterns for accessing the R30/R31 I/O registers.
714 (define_insn "*regio_readsi"
715 [(set (match_operand:SI 0 "register_operand" "=r")
717 [(match_operand:SI 1 "regio_operand" "Rrio")]
718 UNSPECV_REGIO_READ))]
721 [(set_attr "type" "alu")])
723 (define_insn "*regio_nozext_writesi"
724 [(set (match_operand:SI 0 "regio_operand" "=Rrio")
726 [(match_operand:SI 1 "register_operand" "r")]
727 UNSPECV_REGIO_WRITE))]
730 [(set_attr "type" "alu")])
732 (define_insn "*regio_zext_write_r30<EQS0:mode>"
733 [(set (match_operand:SI 0 "regio_operand" "=Rrio")
735 [(zero_extend:SI (match_operand:EQS0 1 "register_operand" "r"))]
736 UNSPECV_REGIO_WRITE))]
739 [(set_attr "type" "alu")])
741 ;; DI logical ops could be automatically split into WORD-mode ops in
742 ;; expand_binop(). But then we'll miss an opportunity to use SI mode
743 ;; operations, since WORD mode for PRU is QI.
744 (define_expand "<code>di3"
745 [(set (match_operand:DI 0 "register_operand")
747 (match_operand:DI 1 "register_operand")
748 (match_operand:DI 2 "reg_or_const_int_operand")))]
751 /* Try with the more efficient zero/fill patterns first. */
752 if (<LOGICAL_BITOP:CODE> == IOR
753 && CONST_INT_P (operands[2])
754 && const_fillbytes_operand (operands[2], DImode))
756 rtx insn = maybe_gen_pru_ior_fillbytes (DImode,
762 if (REGNO (operands[0]) != REGNO (operands[1]))
763 emit_move_insn (operands[0], operands[1]);
768 if (<LOGICAL_BITOP:CODE> == AND
769 && CONST_INT_P (operands[2])
770 && const_zerobytes_operand (operands[2], DImode))
772 rtx insn = maybe_gen_pru_and_zerobytes (DImode,
778 if (REGNO (operands[0]) != REGNO (operands[1]))
779 emit_move_insn (operands[0], operands[1]);
784 /* No optimized case found. Rely on the two-instruction pattern below. */
785 if (!reg_or_ubyte_operand (operands[2], DImode))
786 operands[2] = force_reg (DImode, operands[2]);
789 (define_insn "pru_<code>di3"
790 [(set (match_operand:DI 0 "register_operand" "=&r,&r")
792 (match_operand:DI 1 "register_operand" "%r,r")
793 (match_operand:DI 2 "reg_or_ubyte_operand" "r,I")))]
796 <logical_bitop_asm>\\t%F0, %F1, %F2\;<logical_bitop_asm>\\t%N0, %N1, %N2
797 <logical_bitop_asm>\\t%F0, %F1, %2\;<logical_bitop_asm>\\t%N0, %N1, 0"
798 [(set_attr "type" "alu")
799 (set_attr "length" "8")])
801 (define_insn "one_cmpldi2"
802 [(set (match_operand:DI 0 "register_operand" "=r")
803 (not:DI (match_operand:DI 1 "register_operand" "r")))]
806 /* careful with overlapping source and destination regs. */
807 gcc_assert (GP_REG_P (REGNO (operands[0])));
808 gcc_assert (GP_REG_P (REGNO (operands[1])));
809 if (REGNO (operands[0]) == (REGNO (operands[1]) + 4))
810 return "not\\t%N0, %N1\;not\\t%F0, %F1";
812 return "not\\t%F0, %F1\;not\\t%N0, %N1";
814 [(set_attr "type" "alu")
815 (set_attr "length" "8")])
817 ;; Multiply instruction. The nop is required to ensure that Rmd0 and Rms0
818 ;; registers are sampled and multiplication is executed on those values.
819 ;; Only after that one cycle can xin obtain the result.
821 (define_insn "mulsi3"
822 [(set (match_operand:SI 0 "pru_muldst_operand" "=Rmd0")
823 (mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0")
824 (match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))]
826 "nop\;xin\\t0, %0, 4"
827 [(set_attr "type" "alu")
828 (set_attr "length" "8")])
830 ;; Prologue, Epilogue and Return
832 (define_expand "prologue"
836 pru_expand_prologue ();
840 (define_expand "epilogue"
844 pru_expand_epilogue (false);
848 (define_expand "sibcall_epilogue"
852 pru_expand_epilogue (true);
856 (define_insn "return"
858 "pru_can_use_return_insn ()"
861 (define_insn "simple_return"
866 ;; Block any insns from being moved before this point, since the
867 ;; profiling call to mcount can use various registers that aren't
868 ;; saved or used to pass arguments.
870 (define_insn "blockage"
871 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
874 [(set_attr "type" "unknown")
875 (set_attr "length" "0")])
879 (define_insn "indirect_jump"
880 [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
883 [(set_attr "type" "control")])
887 (label_ref (match_operand 0)))]
890 [(set_attr "type" "control")])
893 (define_expand "call"
894 [(parallel [(call (match_operand 0 "")
895 (match_operand 1 ""))
896 (clobber (reg:HI RA_REGNUM))])]
900 (define_expand "call_value"
901 [(parallel [(set (match_operand 0 "")
902 (call (match_operand 1 "")
903 (match_operand 2 "")))
904 (clobber (reg:HI RA_REGNUM))])]
909 [(call (mem:SI (match_operand:SI 0 "call_operand" "i,r"))
911 (clobber (reg:HI RA_REGNUM))]
916 [(set_attr "type" "control")])
918 (define_insn "*call_value"
919 [(set (match_operand 0)
920 (call (mem:SI (match_operand:SI 1 "call_operand" "i,r"))
922 (clobber (reg:HI RA_REGNUM))]
927 [(set_attr "type" "control")])
929 (define_expand "sibcall"
930 [(parallel [(call (match_operand 0 "")
931 (match_operand 1 ""))
936 (define_expand "sibcall_value"
937 [(parallel [(set (match_operand 0 "")
938 (call (match_operand 1 "")
939 (match_operand 2 "")))
944 (define_insn "*sibcall"
945 [(call (mem:SI (match_operand:SI 0 "call_operand" "i,Rsib"))
948 "SIBLING_CALL_P (insn)"
952 [(set_attr "type" "control")])
954 (define_insn "*sibcall_value"
955 [(set (match_operand 0 "register_operand" "")
956 (call (mem:SI (match_operand:SI 1 "call_operand" "i,Rsib"))
959 "SIBLING_CALL_P (insn)"
963 [(set_attr "type" "control")])
965 (define_insn "*tablejump"
967 (match_operand:SI 0 "register_operand" "r"))
968 (use (label_ref (match_operand 1)))]
971 [(set_attr "type" "control")])
973 ;; Expand the cbranch pattern in order to assign different constraints for
974 ;; signed and unsigned comparisons.
975 (define_expand "cbranch<mode>4"
978 (match_operator 0 "ordered_comparison_operator"
979 [(match_operand:QISI 1 "register_operand")
980 (match_operand:QISI 2 "reg_or_const_int_operand")])
981 (label_ref (match_operand 3 ""))
985 /* Ensure our patterns will be able to handle the particular const_int. */
986 if (CONST_INT_P (operands[2]))
988 HOST_WIDE_INT ival = INTVAL (operands[2]);
990 /* For signed comparisons, we cannot play games with the const_int's
991 sign. PRU patterns do not support negative integer constants. */
992 if (pru_signed_cmp_operator (operands[0], VOIDmode) && !UBYTE_INT (ival))
994 if (can_create_pseudo_p ())
995 operands[2] = force_reg (<MODE>mode, operands[2]);
1000 /* For unsigned comparisons, be prepared to handle the QI quirk. */
1001 if (pru_cmp_operator (operands[0], VOIDmode)
1002 && !const_ubyte_operand (operands[2], <MODE>mode))
1004 if (can_create_pseudo_p ())
1005 operands[2] = force_reg (<MODE>mode, operands[2]);
1012 (define_insn "cbranch<mode>4_unsigned"
1015 (match_operator 0 "pru_cmp_operator"
1016 [(match_operand:QISI 1 "register_operand" "r")
1017 (match_operand:QISI 2 "reg_or_ubyte_operand" "r<QISI:ubyte_constr>")])
1018 (label_ref (match_operand 3))
1022 const bool is_near = (get_attr_length (insn) == 4);
1024 /* PRU comparisons reverse the operand order (OP2 cmp OP1),
1025 so swap the condition. */
1027 return "qb%P0\t%l3, %1, %u2";
1029 return "qb%Q0\t.+8, %1, %u2\;jmp\t%%label(%l3)";
1031 [(set_attr "type" "control")
1032 (set (attr "length")
1034 (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
1035 (le (minus (match_dup 3) (pc)) (const_int 2036)))
1039 ;; Unlike ALU operations, the const_int's sign here is important. So we
1040 ;; cannot use ubyte_constr.
1042 ;; NOTE: The short branch check has no typo! We must be conservative and
1043 ;; take into account the worst case of having a signed comparison with a
1044 ;; "far taken branch" label, which amounts to 7 instructions.
1045 (define_insn "cbranch<mode>4_signed"
1048 (match_operator 0 "pru_signed_cmp_operator"
1049 [(match_operand:QISI 1 "register_operand" "r,r,r")
1050 (match_operand:QISI 2 "reg_or_ubyte_operand" "r,Z,I")])
1051 (label_ref (match_operand 3))
1055 const int length = (get_attr_length (insn));
1056 const bool is_near = (length == 20);
1057 enum rtx_code code = GET_CODE (operands[0]);
1059 if (which_alternative == 0)
1060 return pru_output_signed_cbranch (operands, is_near);
1061 else if (which_alternative == 1 && (code == LT || code == GE))
1062 return pru_output_signed_cbranch_zeroop2 (operands, is_near);
1064 return pru_output_signed_cbranch_ubyteop2 (operands, is_near);
1066 [(set_attr "type" "control")
1067 (set (attr "length")
1069 (and (ge (minus (match_dup 3) (pc)) (const_int -2020))
1070 (le (minus (match_dup 3) (pc)) (const_int 2016)))
1074 (define_expand "cbranch<mode>4"
1076 (if_then_else (match_operator 0 "pru_fp_comparison_operator"
1077 [(match_operand:SFDF 1 "register_operand")
1078 (match_operand:SFDF 2 "register_operand")])
1079 (label_ref (match_operand 3 ""))
1083 rtx t = pru_expand_fp_compare (operands[0], VOIDmode);
1085 operands[1] = XEXP (t, 0);
1086 operands[2] = XEXP (t, 1);
1092 (define_code_iterator BIT_TEST [eq ne])
1093 (define_code_attr qbbx_op [(eq "qbbc") (ne "qbbs")])
1094 (define_code_attr qbbx_negop [(eq "qbbs") (ne "qbbc")])
1096 (define_insn "cbranch_qbbx_<BIT_TEST:code><EQS0:mode><EQS1:mode><EQD:mode>4"
1099 (BIT_TEST (zero_extract:EQD
1100 (match_operand:EQS0 0 "register_operand" "r")
1102 (match_operand:EQS1 1 "reg_or_ubyte_operand" "r<EQS1:ubyte_constr>"))
1104 (label_ref (match_operand 2))
1108 const int length = (get_attr_length (insn));
1109 const bool is_near = (length == 4);
1111 return "<BIT_TEST:qbbx_op>\\t%l2, %0, %u1";
1113 return "<BIT_TEST:qbbx_negop>\\t.+8, %0, %u1\;jmp\\t%%label(%l2)";
1115 [(set_attr "type" "control")
1116 (set (attr "length")
1118 (and (ge (minus (match_dup 2) (pc)) (const_int -2048))
1119 (le (minus (match_dup 2) (pc)) (const_int 2044)))
1123 ;; Bit test conditional branch, but only for constant bit positions.
1124 ;; This restriction allows an efficient code for DImode operands.
1126 ;; QImode is already handled by the pattern variant above.
1127 (define_insn "@cbranch_qbbx_const_<BIT_TEST:code><HIDI:mode>"
1130 (BIT_TEST (zero_extract:HIDI
1131 (match_operand:HIDI 0 "register_operand" "r")
1133 (match_operand:VOID 1 "const_int_operand" "i"))
1135 (label_ref (match_operand 2))
1139 const int length = (get_attr_length (insn));
1140 const bool is_near = (length == 4);
1142 if (<HIDI:MODE>mode == DImode && INTVAL (operands[1]) <= 31)
1145 return "<BIT_TEST:qbbx_op>\\t%l2, %F0, %1";
1147 return "<BIT_TEST:qbbx_negop>\\t.+8, %F0, %1\;jmp\\t%%label(%l2)";
1149 else if (<HIDI:MODE>mode == DImode)
1152 return "<BIT_TEST:qbbx_op>\\t%l2, %N0, %1 - 32";
1154 return "<BIT_TEST:qbbx_negop>\\t.+8, %N0, %1 - 32\;jmp\\t%%label(%l2)";
1159 return "<BIT_TEST:qbbx_op>\\t%l2, %0, %1";
1161 return "<BIT_TEST:qbbx_negop>\\t.+8, %0, %1\;jmp\\t%%label(%l2)";
1164 [(set_attr "type" "control")
1165 (set (attr "length")
1167 (and (ge (minus (match_dup 2) (pc)) (const_int -2048))
1168 (le (minus (match_dup 2) (pc)) (const_int 2044)))
1172 ;; ::::::::::::::::::::
1174 ;; :: Low Overhead Looping - idea "borrowed" from MEP
1176 ;; ::::::::::::::::::::
1178 ;; This insn is volatile because we'd like it to stay in its original
1179 ;; position, just before the loop header. If it stays there, we might
1180 ;; be able to convert it into a "loop" insn.
1181 (define_insn "@doloop_begin_internal<mode>"
1182 [(set (match_operand:HISI 0 "register_operand" "=r")
1183 (unspec_volatile:HISI
1184 [(match_operand:HISI 1 "reg_or_ubyte_operand" "rI")
1185 (match_operand 2 "const_int_operand" "")] UNSPECV_LOOP_BEGIN))]
1191 (define_expand "doloop_begin"
1192 [(use (match_operand 0 "register_operand"))
1193 (use (match_operand 1 ""))]
1196 pru_emit_doloop (operands, 0);
1200 ; Note: "JUMP_INSNs and CALL_INSNs are not allowed to have any output
1201 ; reloads;". Hence this insn must be prepared for a counter that is
1203 (define_insn "@doloop_end_internal<mode>"
1205 (if_then_else (ne (match_operand:HISI 0 "nonimmediate_operand" "+r,*m")
1207 (label_ref (match_operand 1))
1210 (plus:HISI (match_dup 0)
1212 (unspec [(match_operand 2 "const_int_operand" "")] UNSPECV_LOOP_END)
1213 (clobber (match_scratch:HISI 3 "=X,&r"))]
1218 ;; Worst case length:
1220 ;; lbbo op3_reg, op3_ptr 4'
1221 ;; sub <op3_reg>, 1 4
1222 ;; qbeq .+8, <op3_reg>, 0 4
1224 ;; sbbo op3_reg, op3_ptr 4
1225 [(set (attr "length")
1227 (and (ge (minus (pc) (match_dup 1)) (const_int 0))
1228 (le (minus (pc) (match_dup 1)) (const_int 1020)))
1229 (cond [(eq_attr "alternative" "0") (const_int 4)]
1231 (cond [(eq_attr "alternative" "0") (const_int 12)]
1234 (define_expand "doloop_end"
1235 [(use (match_operand 0 "nonimmediate_operand"))
1236 (use (label_ref (match_operand 1 "")))]
1239 if (GET_CODE (operands[0]) == REG && GET_MODE (operands[0]) == QImode)
1241 pru_emit_doloop (operands, 1);
1245 (define_insn "@pruloop<mode>"
1246 [(set (reg:HISI LOOPCNTR_REGNUM)
1247 (unspec:HISI [(match_operand:HISI 0 "reg_or_ubyte_operand" "rI")
1248 (label_ref (match_operand 1))]
1249 UNSPECV_LOOP_BEGIN))]
1253 (define_insn "pruloop_end"
1254 [(unspec [(const_int 0)] UNSPECV_LOOP_END)]
1257 [(set_attr "length" "0")])
1262 (define_insn "delay_cycles_start"
1263 [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
1264 UNSPECV_DELAY_CYCLES_START)]
1266 "/* Begin %0 cycle delay. */"
1267 [(set_attr "length" "0")])
1269 (define_insn "delay_cycles_end"
1270 [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
1271 UNSPECV_DELAY_CYCLES_END)]
1273 "/* End %0 cycle delay. */"
1274 [(set_attr "length" "0")])
1277 (define_insn "delay_cycles_2x_plus1_hi"
1278 [(unspec_volatile [(match_operand:SI 0 "const_uhword_operand" "J")]
1279 UNSPECV_DELAY_CYCLES_2X_HI)
1280 (clobber (match_scratch:SI 1 "=&r"))]
1282 "ldi\\t%1, %0\;sub\\t%1, %1, 1\;qbne\\t.-4, %1, 0"
1283 [(set_attr "length" "12")])
1286 ; Do not use LDI32 here because we do not want
1287 ; to accidentally loose one instruction cycle.
1288 (define_insn "delay_cycles_2x_plus2_si"
1289 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")]
1290 UNSPECV_DELAY_CYCLES_2X_SI)
1291 (clobber (match_scratch:SI 1 "=&r"))]
1293 "ldi\\t%1.w0, %L0\;ldi\\t%1.w2, %H0\;sub\\t%1, %1, 1\;qbne\\t.-4, %1, 0"
1294 [(set_attr "length" "16")])
1296 (define_insn "delay_cycles_1"
1297 [(unspec_volatile [(const_int 0) ] UNSPECV_DELAY_CYCLES_1)]
1299 "nop\\t# delay_cycles_1"
1307 [(set_attr "type" "alu")])
1309 (define_insn "nop_loop_guard"
1312 "nop\\t# Loop end guard"
1313 [(set_attr "type" "alu")])
1315 ;; HALT instruction.
1316 (define_insn "pru_halt"
1317 [(unspec_volatile [(const_int 0)] UNSPECV_HALT)]
1320 [(set_attr "type" "control")])
1322 ;; Count Leading Zeros implemented using LMBD.
1323 ;; LMBD returns 32 if bit value is not present, and we subtract 31 to get CLZ.
1324 ;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
1325 (define_expand "clz<mode>2"
1326 [(set (match_operand:QISI 0 "register_operand")
1327 (clz:QISI (match_operand:QISI 1 "register_operand")))]
1330 rtx dst = operands[0];
1331 rtx src = operands[1];
1332 rtx tmpval = gen_reg_rtx (<MODE>mode);
1334 emit_insn (gen_pru_lmbd (<MODE>mode, tmpval, src, const1_rtx));
1335 emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval));
1339 ;; Left Most Bit Detect operation, which maps to a single instruction.
1340 (define_expand "@pru_lmbd<mode>"
1341 [(set (match_operand:QISI 0 "register_operand")
1343 [(match_operand:QISI 1 "register_operand")
1344 (match_operand:QISI 2 "reg_or_ubyte_operand")]