]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pru/pru.md
pru: Optimize DI shifts
[thirdparty/gcc.git] / gcc / config / pru / pru.md
1 ;; Machine Description for TI PRU.
2 ;; Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
4 ;; Based on the NIOS2 GCC port.
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
21
22 ;; Register numbers.
23 (define_constants
24 [
25 (FIRST_ARG_REGNUM 56) ; Argument registers.
26 (LAST_ARG_REGNUM 119) ;
27 (FIRST_RETVAL_REGNUM 56) ; Return value registers.
28 (LAST_RETVAL_REGNUM 60) ;
29 (FIRST_CALLEE_SAVED_REGNUM 12) ; Callee saved registers.
30 (LAST_CALEE_SAVED_REGNUM 55) ;
31 (PROLOGUE_TEMP_REGNUM 4) ; Temporary register to use in prologue.
32
33 (RA_REGNUM 14) ; Return address register r3.w2.
34 (FP_REGNUM 16) ; Frame pointer register.
35 (MULDST_REGNUM 104) ; Multiply destination register.
36 (MULSRC0_REGNUM 112) ; Multiply source register.
37 (MULSRC1_REGNUM 116) ; Multiply source register.
38 (LAST_NONIO_GP_REGNUM 119) ; Last non-I/O general purpose register.
39 (R30_REGNUM 120) ; R30 I/O register.
40 (R31_REGNUM 124) ; R31 I/O register.
41 (LOOPCNTR_REGNUM 128) ; internal LOOP counter register
42 (LAST_GP_REGNUM 132) ; Last general purpose register.
43
44 ;; Target register definitions.
45 (STACK_POINTER_REGNUM 8)
46 (HARD_FRAME_POINTER_REGNUM FP_REGNUM)
47 (PC_REGNUM 132)
48 (FRAME_POINTER_REGNUM 136)
49 (ARG_POINTER_REGNUM 140)
50 (FIRST_PSEUDO_REGISTER 144)
51 ]
52 )
53
54 ;; Enumerate address spaces.
55 (define_constants
56 [
57 (ADDR_SPACE_REGIO 1) ; Access to R30 and R31 I/O registers.
58 ]
59 )
60
61 ;; Enumeration of UNSPECs.
62
63 (define_c_enum "unspec" [
64 UNSPEC_LMBD
65 ])
66
67 (define_c_enum "unspecv" [
68 UNSPECV_DELAY_CYCLES_START
69 UNSPECV_DELAY_CYCLES_END
70 UNSPECV_DELAY_CYCLES_2X_HI
71 UNSPECV_DELAY_CYCLES_2X_SI
72 UNSPECV_DELAY_CYCLES_1
73
74 UNSPECV_LOOP_BEGIN
75 UNSPECV_LOOP_END
76
77 UNSPECV_HALT
78
79 UNSPECV_BLOCKAGE
80
81 UNSPECV_REGIO_READ
82 UNSPECV_REGIO_WRITE
83 ])
84 \f
85 ; Length of an instruction (in bytes).
86 (define_attr "length" "" (const_int 4))
87 (define_attr "type"
88 "unknown,complex,control,alu,cond_alu,st,ld,shift"
89 (const_string "complex"))
90
91 (define_asm_attributes
92 [(set_attr "length" "4")
93 (set_attr "type" "complex")])
94
95 ; There is no pipeline, so our scheduling description is simple.
96 (define_automaton "pru")
97 (define_cpu_unit "cpu" "pru")
98
99 (define_insn_reservation "everything" 1 (match_test "true") "cpu")
100
101 (include "predicates.md")
102 (include "constraints.md")
103
104 ;; All supported direct move-modes
105 (define_mode_iterator MOV8_16_32 [QI QQ UQQ
106 HI HQ UHQ HA UHA
107 SI SQ USQ SA USA SF SD])
108
109 (define_mode_iterator MOV8_16 [QI QQ UQQ
110 HI HQ UHQ HA UHA])
111 (define_mode_iterator MOV32 [SI SQ USQ SA USA SF SD])
112 (define_mode_iterator MOV64 [DI DF DD DQ UDQ])
113 (define_mode_iterator QISI [QI HI SI])
114 (define_mode_iterator HISI [HI SI])
115 (define_mode_iterator HIDI [HI SI DI])
116 (define_mode_iterator SFDF [SF DF])
117
118 ;; EQS0/1 for extension source 0/1 and EQD for extension destination patterns.
119 (define_mode_iterator EQS0 [QI HI SI])
120 (define_mode_iterator EQS1 [QI HI SI])
121 (define_mode_iterator EQD [QI HI SI])
122 (define_mode_iterator EQDHIDI [HI SI DI])
123
124 ;; GCC sign-extends its integer constants. Hence 0x80 will be represented
125 ;; as -128 for QI mode and 128 for HI and SI modes. To cope with this,
126 ;; use different constraints to match UBYTE in different modes.
127 ;;
128 ;; Wherever this iterator is used, the corresponding operand has the 'u'
129 ;; print format modifier. That is how the QI signedness is cured, and
130 ;; the generated assembly contains unsigned constants.
131 ;;
132 ;; If the pattern has no QI operands, then this iterator need not be used.
133 ;;
134 ;; Note that we do not require "uhword_constr" since ALU instructions
135 ;; can use only UBYTE constants. The MOV patterns are already separately
136 ;; defined for each size, hence no need for an iterator.
137 (define_mode_attr ubyte_constr [(QI "O") (HI "I") (SI "I")])
138 \f
139 ;; Move instructions
140
141 (define_expand "mov<mode>"
142 [(set (match_operand:MOV8_16_32 0 "nonimmediate_operand")
143 (match_operand:MOV8_16_32 1 "general_operand"))]
144 ""
145 {
146 if (MEM_P (operands[0])
147 && MEM_ADDR_SPACE (operands[0]) == ADDR_SPACE_REGIO)
148
149 {
150 /* Intercept writes to the SImode register I/O "address space". */
151 gcc_assert (<MODE>mode == SImode);
152
153 if (!SYMBOL_REF_P (XEXP (operands[0], 0)))
154 {
155 error ("invalid access to %<__regio_symbol%> address space");
156 FAIL;
157 }
158
159 if (!REG_P (operands[1]))
160 operands[1] = force_reg (<MODE>mode, operands[1]);
161
162 int regiono = pru_symref2ioregno (XEXP (operands[0], 0));
163 gcc_assert (regiono >= 0);
164 rtx regio = gen_rtx_REG (<MODE>mode, regiono);
165 rtx unspecv = gen_rtx_UNSPEC_VOLATILE (<MODE>mode,
166 gen_rtvec (1, operands[1]),
167 UNSPECV_REGIO_WRITE);
168 emit_insn (gen_rtx_SET (regio, unspecv));
169 DONE;
170 }
171 else if (MEM_P (operands[1])
172 && MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_REGIO)
173 {
174 /* Intercept reads from the SImode register I/O "address space". */
175 gcc_assert (<MODE>mode == SImode);
176
177 if (!SYMBOL_REF_P (XEXP (operands[1], 0)))
178 {
179 error ("invalid access to %<__regio_symbol%> address space");
180 FAIL;
181 }
182
183 if (MEM_P (operands[0]))
184 operands[0] = force_reg (<MODE>mode, operands[0]);
185
186 int regiono = pru_symref2ioregno (XEXP (operands[1], 0));
187 gcc_assert (regiono >= 0);
188 rtx regio = gen_rtx_REG (<MODE>mode, regiono);
189 rtx unspecv = gen_rtx_UNSPEC_VOLATILE (<MODE>mode,
190 gen_rtvec (1, regio),
191 UNSPECV_REGIO_READ);
192 emit_insn (gen_rtx_SET (operands[0], unspecv));
193 DONE;
194 }
195 else if (MEM_P (operands[0]))
196 {
197 /* It helps to split constant loading and memory access
198 early, so that the LDI/LDI32 instructions can be hoisted
199 outside a loop body. */
200 operands[1] = force_reg (<MODE>mode, operands[1]);
201 }
202 })
203
204 ;; Keep a single pattern for 32 bit MOV operations. LRA requires that the
205 ;; movXX patterns be unified for any given mode.
206 ;;
207 ;; Note: Assume that Program Mem (T constraint) can fit in 16 bits!
208 (define_insn "prumov<mode>"
209 [(set (match_operand:MOV32 0 "nonimmediate_operand" "=m,r,r,r,r,r,r")
210 (match_operand:MOV32 1 "general_operand" "r,m,r,T,J,Um,iF"))]
211 ""
212 "@
213 sb%B0o\\t%b1, %0, %S0
214 lb%B1o\\t%b0, %1, %S1
215 mov\\t%0, %1
216 ldi\\t%0, %%pmem(%1)
217 ldi\\t%0, %1
218 fill\\t%0, 4
219 ldi32\\t%0, %1"
220 [(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
221 (set_attr "length" "4,4,4,4,4,4,8")])
222
223
224 ;; Separate pattern for 8 and 16 bit moves, since LDI32 pseudo instruction
225 ;; cannot handle byte and word-sized registers.
226 ;;
227 ;; Note: Constraint N is fine for both QI and HI mode, since it is used
228 ;; in the context of 16 bit constant integer.
229 (define_insn "prumov<mode>"
230 [(set (match_operand:MOV8_16 0 "nonimmediate_operand" "=m,r,r,r,r")
231 (match_operand:MOV8_16 1 "general_operand" "r,m,r,T,N"))]
232 ""
233 "@
234 sb%B0o\\t%b1, %0, %S0
235 lb%B1o\\t%b0, %1, %S1
236 mov\\t%0, %1
237 ldi\\t%0, %%pmem(%1)
238 ldi\\t%0, (%1) & 0xffff"
239 [(set_attr "type" "st,ld,alu,alu,alu")
240 (set_attr "length" "4")])
241
242
243 ; Pmode is 32 bits for PRU so symbolic constants cannot be 64 bits. Hence
244 ; this pattern handles only numeric constants.
245 ;
246 ; Note: Unlike the arithmetics, here we cannot use "&" output modifier.
247 ; GCC expects to be able to move registers around "no matter what".
248 ; Forcing DI reg alignment (akin to microblaze's HARD_REGNO_MODE_OK)
249 ; does not seem efficient, and will violate TI ABI.
250 (define_insn "mov<mode>"
251 [(set (match_operand:MOV64 0 "nonimmediate_operand" "=m,r,r,r,r,r,r")
252 (match_operand:MOV64 1 "general_operand" "r,m,Um,r,T,J,nF"))]
253 ""
254 {
255 switch (which_alternative)
256 {
257 case 0:
258 return "sb%B0o\\t%b1, %0, %S0";
259 case 1:
260 return "lb%B1o\\t%b0, %1, %S1";
261 case 2:
262 return "fill\\t%F0, 8";
263 case 3:
264 /* careful with overlapping source and destination regs. */
265 gcc_assert (GP_REG_P (REGNO (operands[0])));
266 gcc_assert (GP_REG_P (REGNO (operands[1])));
267 if (REGNO (operands[0]) == (REGNO (operands[1]) + 4))
268 return "mov\\t%N0, %N1\;mov\\t%F0, %F1";
269 else
270 return "mov\\t%F0, %F1\;mov\\t%N0, %N1";
271 case 4:
272 return "ldi\\t%F0, %%pmem(%1)\;ldi\\t%N0, 0";
273 case 5:
274 return "ldi\\t%F0, %1\;ldi\\t%N0, 0";
275 case 6:
276 return "ldi32\\t%F0, %w1\;ldi32\\t%N0, %W1";
277 default:
278 gcc_unreachable ();
279 }
280 }
281 [(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
282 (set_attr "length" "4,4,4,8,8,8,16")])
283
284 ;
285 ; load_multiple pattern(s).
286 ;
287 ; ??? Due to reload problems with replacing registers inside match_parallel
288 ; we currently support load_multiple/store_multiple only after reload.
289 ;
290 ; Idea taken from the s390 port.
291
292 (define_expand "load_multiple"
293 [(match_par_dup 3 [(set (match_operand 0 "")
294 (match_operand 1 ""))
295 (use (match_operand 2 ""))])]
296 "reload_completed"
297 {
298 machine_mode mode;
299 int regno;
300 int count;
301 rtx base_reg;
302 poly_int64 base_offs;
303 int i;
304
305 /* Support only loading a constant number of fixed-point registers from
306 memory. */
307 if (GET_CODE (operands[2]) != CONST_INT
308 || GET_CODE (operands[1]) != MEM
309 || GET_CODE (operands[0]) != REG)
310 FAIL;
311
312 count = INTVAL (operands[2]);
313 regno = REGNO (operands[0]);
314 mode = GET_MODE (operands[0]);
315 if (mode != QImode)
316 FAIL;
317
318 operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
319
320 gcc_assert (!can_create_pseudo_p ());
321
322 base_reg = strip_offset (XEXP (operands[1], 0), &base_offs);
323 if (GET_CODE (base_reg) != REG)
324 FAIL;
325
326 for (i = 0; i < count; i++)
327 XVECEXP (operands[3], 0, i)
328 = gen_rtx_SET (gen_rtx_REG (mode, regno + i),
329 change_address (operands[1], mode,
330 plus_constant (Pmode, base_reg,
331 base_offs + i * GET_MODE_SIZE (mode))));
332 })
333
334 (define_insn "*pru_load_multiple"
335 [(match_parallel 0 "load_multiple_operation"
336 [(set (match_operand:QI 1 "register_operand" "=r")
337 (match_operand:QI 2 "memory_operand" "m"))])]
338 "reload_completed"
339 {
340 int nregs = XVECLEN (operands[0], 0);
341 operands[0] = GEN_INT (nregs);
342 return "lb%B2o\\t%b1, %2, %0";
343 }
344 [(set_attr "type" "ld")])
345
346 ;
347 ; store multiple pattern(s).
348 ;
349
350 (define_expand "store_multiple"
351 [(match_par_dup 3 [(set (match_operand 0 "")
352 (match_operand 1 ""))
353 (use (match_operand 2 ""))])]
354 "reload_completed"
355 {
356 machine_mode mode;
357 int regno;
358 int count;
359 rtx base_reg;
360 poly_int64 base_offs;
361 int i;
362
363 /* Support only storing a constant number of fixed-point registers to
364 memory. */
365 if (GET_CODE (operands[2]) != CONST_INT
366 || GET_CODE (operands[0]) != MEM
367 || GET_CODE (operands[1]) != REG)
368 FAIL;
369
370 count = INTVAL (operands[2]);
371 regno = REGNO (operands[1]);
372 mode = GET_MODE (operands[1]);
373 if (mode != QImode)
374 FAIL;
375
376 operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
377
378 gcc_assert (!can_create_pseudo_p ());
379
380 base_reg = strip_offset (XEXP (operands[0], 0), &base_offs);
381 if (GET_CODE (base_reg) != REG)
382 FAIL;
383
384 for (i = 0; i < count; i++)
385 XVECEXP (operands[3], 0, i)
386 = gen_rtx_SET (change_address (operands[0], mode,
387 plus_constant (Pmode, base_reg,
388 base_offs + i * GET_MODE_SIZE (mode))),
389 gen_rtx_REG (mode, regno + i));
390 })
391
392 (define_insn "*pru_store_multiple"
393 [(match_parallel 0 "store_multiple_operation"
394 [(set (match_operand:QI 1 "memory_operand" "=m")
395 (match_operand:QI 2 "register_operand" "r"))])]
396 "reload_completed"
397 {
398 int nregs = XVECLEN (operands[0], 0);
399 operands[0] = GEN_INT (nregs);
400 return "sb%B1o\\t%b2, %1, %0";
401 }
402 [(set_attr "type" "st")])
403 \f
404 ;; Zero extension patterns
405 ;;
406 ;; Unfortunately we cannot use lbbo to load AND zero-extent a value.
407 ;; The burst length parameter of the LBBO instruction designates not only
408 ;; the number of memory data bytes fetched, but also the number of register
409 ;; byte fields written.
410 (define_expand "zero_extend<EQS0:mode><EQD:mode>2"
411 [(set (match_operand:EQD 0 "register_operand")
412 (zero_extend:EQD (match_operand:EQS0 1 "register_operand")))]
413 ""
414 "")
415
416 (define_insn "*zero_extend<EQS0:mode><EQD:mode>2"
417 [(set (match_operand:EQD 0 "register_operand" "=r")
418 (zero_extend:EQD (match_operand:EQS0 1 "register_operand" "r")))]
419 ""
420 "mov\\t%0, %1"
421 [(set_attr "type" "alu")])
422
423 (define_insn "zero_extendqidi2"
424 [(set (match_operand:DI 0 "register_operand" "=r,r")
425 (zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))]
426 ""
427 "@
428 zero\\t%F0.b1, 7
429 mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7"
430 [(set_attr "type" "alu,alu")
431 (set_attr "length" "4,8")])
432
433 (define_insn "zero_extendhidi2"
434 [(set (match_operand:DI 0 "register_operand" "=r,r")
435 (zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))]
436 ""
437 "@
438 zero\\t%F0.b2, 6
439 mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6"
440 [(set_attr "type" "alu,alu")
441 (set_attr "length" "4,8")])
442
443 (define_insn "zero_extendsidi2"
444 [(set (match_operand:DI 0 "register_operand" "=r,r")
445 (zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))]
446 ""
447 "@
448 zero\\t%N0, 4
449 mov\\t%F0, %1\;zero\\t%N0, 4"
450 [(set_attr "type" "alu,alu")
451 (set_attr "length" "4,8")])
452
453 ;; Sign extension pattern. We have to emulate it due to lack of
454 ;; signed operations in PRU's ALU.
455
456 (define_expand "extend<EQS0:mode><EQDHIDI:mode>2"
457 [(set (match_operand:EQDHIDI 0 "register_operand" "=r")
458 (sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))]
459 ""
460 {
461 rtx_code_label *skip_hiset_label;
462
463 /* Clear the higher bits to temporarily make the value positive. */
464 emit_insn (gen_rtx_SET (operands[0],
465 gen_rtx_ZERO_EXTEND (<EQDHIDI:MODE>mode,
466 operands[1])));
467
468 /* Now check if the result must be made negative. */
469 skip_hiset_label = gen_label_rtx ();
470 const int op1_size = GET_MODE_SIZE (<EQS0:MODE>mode);
471 const int op1_sign_bit = op1_size * BITS_PER_UNIT - 1;
472 emit_jump_insn (gen_cbranch_qbbx_const (EQ,
473 <EQDHIDI:MODE>mode,
474 operands[0],
475 GEN_INT (op1_sign_bit),
476 skip_hiset_label));
477 emit_insn (gen_ior<EQDHIDI:mode>3 (
478 operands[0],
479 operands[0],
480 GEN_INT (~GET_MODE_MASK (<EQS0:MODE>mode))));
481 emit_label (skip_hiset_label);
482
483 DONE;
484 })
485 \f
486 ;; Bit extraction
487 ;; We define it solely to allow combine to choose SImode
488 ;; for word mode when trying to match our cbranch_qbbx_* insn.
489 ;;
490 ;; Check how combine.cc:make_extraction() uses
491 ;; get_best_reg_extraction_insn() to select the op size.
492 (define_insn "extzv<mode>"
493 [(set (match_operand:QISI 0 "register_operand" "=r")
494 (zero_extract:QISI
495 (match_operand:QISI 1 "register_operand" "r")
496 (match_operand:QISI 2 "const_int_operand" "i")
497 (match_operand:QISI 3 "const_int_operand" "i")))]
498 ""
499 "lsl\\t%0, %1, (%S0 * 8 - %2 - %3)\;lsr\\t%0, %0, (%S0 * 8 - %2)"
500 [(set_attr "type" "complex")
501 (set_attr "length" "8")])
502
503
504 \f
505 ;; Arithmetic Operations
506
507 (define_expand "add<mode>3"
508 [(set (match_operand:QISI 0 "register_operand")
509 (plus:QISI (match_operand:QISI 1 "register_operand")
510 (match_operand:QISI 2 "nonmemory_operand")))]
511 ""
512 "")
513
514 (define_insn "adddi3"
515 [(set (match_operand:DI 0 "register_operand" "=&r,&r,&r")
516 (plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
517 (match_operand:DI 2 "reg_or_ubyte_operand" "r,I,M")))]
518 ""
519 "@
520 add\\t%F0, %F1, %F2\;adc\\t%N0, %N1, %N2
521 add\\t%F0, %F1, %2\;adc\\t%N0, %N1, 0
522 sub\\t%F0, %F1, %n2\;suc\\t%N0, %N1, 0"
523 [(set_attr "type" "alu")
524 (set_attr "length" "8")])
525
526 (define_expand "sub<mode>3"
527 [(set (match_operand:QISI 0 "register_operand")
528 (minus:QISI (match_operand:QISI 1 "reg_or_ubyte_operand")
529 (match_operand:QISI 2 "reg_or_ubyte_operand")))]
530 ""
531 "")
532
533 (define_insn "subdi3"
534 [(set (match_operand:DI 0 "register_operand" "=&r,&r")
535 (minus:DI (match_operand:DI 1 "reg_or_ubyte_operand" "r,I")
536 (match_operand:DI 2 "register_operand" "r,r")))]
537 ""
538 "@
539 sub\\t%F0, %F1, %F2\;suc\\t%N0, %N1, %N2
540 rsb\\t%F0, %F2, %1\;rsc\\t%N0, %N2, 0"
541 [(set_attr "type" "alu")
542 (set_attr "length" "8")])
543 \f
544 ;; Negate and ones complement
545
546 (define_expand "neg<mode>2"
547 [(set (match_operand:QISI 0 "register_operand")
548 (neg:QISI (match_operand:QISI 1 "register_operand")))]
549 ""
550 "")
551
552 (define_expand "one_cmpl<mode>2"
553 [(set (match_operand:QISI 0 "register_operand")
554 (not:QISI (match_operand:QISI 1 "register_operand")))]
555 ""
556 "")
557 \f
558 ;; Integer logical Operations
559 ;;
560 ;; TODO - add optimized cases that exploit the fact that we can get away
561 ;; with a single machine op for special constants, e.g. UBYTE << (0/8/16/24)
562
563 (define_code_iterator LOGICAL [and ior xor umin umax])
564 (define_code_attr logical_asm [(and "and") (ior "or") (xor "xor") (umin "min") (umax "max")])
565
566 (define_code_iterator LOGICAL_BITOP [and ior xor])
567 (define_code_attr logical_bitop_asm [(and "and") (ior "or") (xor "xor")])
568
569 (define_expand "<code><mode>3"
570 [(set (match_operand:QISI 0 "register_operand")
571 (LOGICAL:QISI (match_operand:QISI 1 "register_operand")
572 (match_operand:QISI 2 "reg_or_ubyte_operand")))]
573 ""
574 "")
575
576 ;; Specialised IOR pattern, which can emit an efficient FILL instruction.
577 (define_insn "@pru_ior_fillbytes<mode>"
578 [(set (match_operand:HIDI 0 "register_operand" "=r")
579 (ior:HIDI
580 (match_operand:HIDI 1 "register_operand" "0")
581 (match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))]
582 ""
583 {
584 static char line[64];
585 pru_byterange r;
586
587 r = pru_calc_byterange (INTVAL (operands[2]), <MODE>mode);
588 gcc_assert (r.start >=0 && r.nbytes > 0);
589 gcc_assert ((r.start + r.nbytes) <= GET_MODE_SIZE (<MODE>mode));
590
591 const int regno = REGNO (operands[0]) + r.start;
592
593 sprintf (line, "fill\\tr%d.b%d, %d", regno / 4, regno % 4, r.nbytes);
594 return line;
595 }
596 [(set_attr "type" "alu")
597 (set_attr "length" "4")])
598
599 ;; Specialised AND pattern, which can emit an efficient ZERO instruction.
600 (define_insn "@pru_and_zerobytes<mode>"
601 [(set (match_operand:HIDI 0 "register_operand" "=r")
602 (and:HIDI
603 (match_operand:HIDI 1 "register_operand" "0")
604 (match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))]
605 ""
606 {
607 static char line[64];
608 pru_byterange r;
609
610 r = pru_calc_byterange (~INTVAL (operands[2]), <MODE>mode);
611 gcc_assert (r.start >=0 && r.nbytes > 0);
612 gcc_assert ((r.start + r.nbytes) <= GET_MODE_SIZE (<MODE>mode));
613
614 const int regno = REGNO (operands[0]) + r.start;
615
616 sprintf (line, "zero\\tr%d.b%d, %d", regno / 4, regno % 4, r.nbytes);
617 return line;
618 }
619 [(set_attr "type" "alu")
620 (set_attr "length" "4")])
621 \f
622 ;; Shift instructions
623
624 (define_code_iterator SHIFT [ashift lshiftrt])
625 (define_code_attr shift_op [(ashift "ashl") (lshiftrt "lshr")])
626 (define_code_attr shift_asm [(ashift "lsl") (lshiftrt "lsr")])
627
628 (define_expand "<shift_op><mode>3"
629 [(set (match_operand:QISI 0 "register_operand")
630 (SHIFT:QISI (match_operand:QISI 1 "register_operand")
631 (match_operand:QISI 2 "shift_operand")))]
632 ""
633 "")
634
635 ; Expand to a loop of single-position arithmetic shifts, which
636 ; we can handle. Pseudo code:
637 ; tmpval = src;
638 ; QImode cntr = nshifts & 0xff;
639 ; while (cntr)
640 ; {
641 ; tmpval >>= 1;
642 ; cntr--;
643 ; }
644 ; dst = tmpval;
645 ;
646 ; Note that the number of shifts is truncated to QImode. This is a fair
647 ; assumption for a loop-based shifting implementation.
648 (define_expand "ashr<mode>3"
649 [(set (match_operand:QISI 0 "register_operand")
650 (ashiftrt:QISI
651 (match_operand:QISI 1 "register_operand")
652 (match_operand:QI 2 "reg_or_const_1_operand")))]
653 ""
654 {
655 rtx dst = operands[0];
656 rtx src = operands[1];
657 rtx nshifts = operands[2];
658 rtx_code_label *loop_label;
659 rtx_code_label *ashr_end_label;
660 rtx test, tmpval, cntr;
661
662 if (const_1_operand (nshifts, VOIDmode))
663 {
664 emit_insn (gen_ashr<mode>3_single (dst, src, nshifts));
665 DONE;
666 }
667
668 tmpval = gen_reg_rtx (<MODE>mode);
669 emit_move_insn (tmpval, src);
670
671 cntr = gen_reg_rtx (QImode);
672 emit_move_insn (cntr, nshifts);
673
674 loop_label = gen_label_rtx ();
675 ashr_end_label = gen_label_rtx ();
676
677 emit_label (loop_label);
678 test = gen_rtx_EQ (VOIDmode, cntr, const0_rtx);
679 emit_jump_insn (gen_cbranchqi4 (test, cntr, const0_rtx, ashr_end_label));
680
681 emit_insn (gen_ashr<mode>3_single (tmpval, tmpval, const1_rtx));
682 emit_insn (gen_addqi3 (cntr, cntr, GEN_INT (-1)));
683
684 emit_jump_insn (gen_jump (loop_label));
685 JUMP_LABEL (get_last_insn ()) = loop_label;
686 LABEL_NUSES (loop_label)++;
687 emit_barrier ();
688
689 emit_label (ashr_end_label);
690
691 emit_move_insn (dst, tmpval);
692
693 DONE;
694 })
695
696 (define_insn "ashr<mode>3_single"
697 [(set (match_operand:QISI 0 "register_operand" "=r")
698 (ashiftrt:QISI
699 (match_operand:QISI 1 "register_operand" "r")
700 (match_operand:QI 2 "const_1_operand" "P")))]
701 ""
702 "lsr\\t%0, %1, 1\;qbbc LSIGN%=, %0, (%S0 * 8) - 2\;set %0, %0, (%S0 * 8) - 1\;LSIGN%=:"
703 [(set_attr "type" "alu")
704 (set_attr "length" "12")])
705
706
707 ; 64-bit LSHIFTRT with a constant shift count can be expanded into
708 ; more efficient code sequence than a variable register shift.
709 ;
710 ; 1. For shift >= 32:
711 ; dst_lo = (src_hi >> (shift - 32))
712 ; dst_hi = 0
713 ;
714 ; 2. For shift==1 there is no need for a temporary:
715 ; dst_lo = (src_lo >> 1)
716 ; if (src_hi & 1)
717 ; dst_lo |= (1 << 31)
718 ; dst_hi = (src_hi >> 1)
719 ;
720 ; 3. For shift < 32:
721 ; dst_lo = (src_lo >> shift)
722 ; tmp = (src_hi << (32 - shift)
723 ; dst_lo |= tmp
724 ; dst_hi = (src_hi >> shift)
725 ;
726 ; 4. For shift in a register:
727 ; Fall back to calling libgcc.
728 (define_expand "lshrdi3"
729 [(set (match_operand:DI 0 "register_operand")
730 (lshiftrt:DI
731 (match_operand:DI 1 "register_operand")
732 (match_operand:QI 2 "const_int_operand")))]
733 ""
734 {
735 gcc_assert (CONST_INT_P (operands[2]));
736
737 const int nshifts = INTVAL (operands[2]);
738 rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
739 rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
740 rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
741 rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
742
743 if (nshifts >= 32)
744 {
745 emit_insn (gen_rtx_SET (dst_lo,
746 gen_rtx_LSHIFTRT (SImode,
747 src_hi,
748 GEN_INT (nshifts - 32))));
749 emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
750 DONE;
751 }
752
753 gcc_assert (can_create_pseudo_p ());
754
755 /* The expansions which follow are safe only if DST_LO and SRC_HI
756 do not overlap. If they do, then fix by using a temporary register.
757 Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
758 is set, SRC_LO is no longer live. */
759 if (reg_overlap_mentioned_p (dst_lo, src_hi))
760 {
761 rtx new_src_hi = gen_reg_rtx (SImode);
762
763 emit_move_insn (new_src_hi, src_hi);
764 src_hi = new_src_hi;
765 }
766
767 if (nshifts == 1)
768 {
769 rtx_code_label *skip_hiset_label;
770 rtx j;
771
772 emit_insn (gen_rtx_SET (dst_lo,
773 gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
774
775 /* The code generated by `genemit' would create a LABEL_REF. */
776 skip_hiset_label = gen_label_rtx ();
777 j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
778 SImode,
779 src_hi,
780 GEN_INT (0),
781 skip_hiset_label));
782 JUMP_LABEL (j) = skip_hiset_label;
783 LABEL_NUSES (skip_hiset_label)++;
784
785 emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
786 emit_label (skip_hiset_label);
787 emit_insn (gen_rtx_SET (dst_hi,
788 gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
789 DONE;
790 }
791
792 if (nshifts < 32)
793 {
794 rtx tmpval = gen_reg_rtx (SImode);
795
796 emit_insn (gen_rtx_SET (dst_lo,
797 gen_rtx_LSHIFTRT (SImode,
798 src_lo,
799 GEN_INT (nshifts))));
800 emit_insn (gen_rtx_SET (tmpval,
801 gen_rtx_ASHIFT (SImode,
802 src_hi,
803 GEN_INT (32 - nshifts))));
804 emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
805 emit_insn (gen_rtx_SET (dst_hi,
806 gen_rtx_LSHIFTRT (SImode,
807 src_hi,
808 GEN_INT (nshifts))));
809 DONE;
810 }
811 gcc_unreachable ();
812 })
813
814 ; 64-bit ASHIFT with a constant shift count can be expanded into
815 ; more efficient code sequence than the libgcc call required by
816 ; a variable shift in a register.
817
818 (define_expand "ashldi3"
819 [(set (match_operand:DI 0 "register_operand")
820 (ashift:DI
821 (match_operand:DI 1 "register_operand")
822 (match_operand:QI 2 "const_int_operand")))]
823 ""
824 {
825 gcc_assert (CONST_INT_P (operands[2]));
826
827 const int nshifts = INTVAL (operands[2]);
828 rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
829 rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
830 rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
831 rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
832
833 if (nshifts >= 32)
834 {
835 emit_insn (gen_rtx_SET (dst_hi,
836 gen_rtx_ASHIFT (SImode,
837 src_lo,
838 GEN_INT (nshifts - 32))));
839 emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
840 DONE;
841 }
842
843 gcc_assert (can_create_pseudo_p ());
844
845 /* The expansions which follow are safe only if DST_HI and SRC_LO
846 do not overlap. If they do, then fix by using a temporary register.
847 Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
848 is set, SRC_HI is no longer live. */
849 if (reg_overlap_mentioned_p (dst_hi, src_lo))
850 {
851 rtx new_src_lo = gen_reg_rtx (SImode);
852
853 emit_move_insn (new_src_lo, src_lo);
854 src_lo = new_src_lo;
855 }
856
857 if (nshifts == 1)
858 {
859 rtx_code_label *skip_hiset_label;
860 rtx j;
861
862 emit_insn (gen_rtx_SET (dst_hi,
863 gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
864
865 skip_hiset_label = gen_label_rtx ();
866 j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
867 SImode,
868 src_lo,
869 GEN_INT (31),
870 skip_hiset_label));
871 JUMP_LABEL (j) = skip_hiset_label;
872 LABEL_NUSES (skip_hiset_label)++;
873
874 emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
875 emit_label (skip_hiset_label);
876 emit_insn (gen_rtx_SET (dst_lo,
877 gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
878 DONE;
879 }
880
881 if (nshifts < 32)
882 {
883 rtx tmpval = gen_reg_rtx (SImode);
884
885 emit_insn (gen_rtx_SET (dst_hi,
886 gen_rtx_ASHIFT (SImode,
887 src_hi,
888 GEN_INT (nshifts))));
889 emit_insn (gen_rtx_SET (tmpval,
890 gen_rtx_LSHIFTRT (SImode,
891 src_lo,
892 GEN_INT (32 - nshifts))));
893 emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
894 emit_insn (gen_rtx_SET (dst_lo,
895 gen_rtx_ASHIFT (SImode,
896 src_lo,
897 GEN_INT (nshifts))));
898 DONE;
899 }
900 gcc_unreachable ();
901 })
902 \f
903 ;; Include ALU patterns with zero-extension of operands. That's where
904 ;; the real insns are defined.
905
906 (include "alu-zext.md")
907 \f
908 ;; Patterns for accessing the R30/R31 I/O registers.
909
910 (define_insn "*regio_readsi"
911 [(set (match_operand:SI 0 "register_operand" "=r")
912 (unspec_volatile:SI
913 [(match_operand:SI 1 "regio_operand" "Rrio")]
914 UNSPECV_REGIO_READ))]
915 ""
916 "mov\\t%0, %1"
917 [(set_attr "type" "alu")])
918
919 (define_insn "*regio_nozext_writesi"
920 [(set (match_operand:SI 0 "regio_operand" "=Rrio")
921 (unspec_volatile:SI
922 [(match_operand:SI 1 "register_operand" "r")]
923 UNSPECV_REGIO_WRITE))]
924 ""
925 "mov\\t%0, %1"
926 [(set_attr "type" "alu")])
927
928 (define_insn "*regio_zext_write_r30<EQS0:mode>"
929 [(set (match_operand:SI 0 "regio_operand" "=Rrio")
930 (unspec_volatile:SI
931 [(zero_extend:SI (match_operand:EQS0 1 "register_operand" "r"))]
932 UNSPECV_REGIO_WRITE))]
933 ""
934 "mov\\t%0, %1"
935 [(set_attr "type" "alu")])
936 \f
937 ;; DI logical ops could be automatically split into WORD-mode ops in
938 ;; expand_binop(). But then we'll miss an opportunity to use SI mode
939 ;; operations, since WORD mode for PRU is QI.
940 (define_expand "<code>di3"
941 [(set (match_operand:DI 0 "register_operand")
942 (LOGICAL_BITOP:DI
943 (match_operand:DI 1 "register_operand")
944 (match_operand:DI 2 "reg_or_const_int_operand")))]
945 ""
946 {
947 /* Try with the more efficient zero/fill patterns first. */
948 if (<LOGICAL_BITOP:CODE> == IOR
949 && CONST_INT_P (operands[2])
950 && const_fillbytes_operand (operands[2], DImode))
951 {
952 rtx insn = maybe_gen_pru_ior_fillbytes (DImode,
953 operands[0],
954 operands[0],
955 operands[2]);
956 if (insn != nullptr)
957 {
958 if (REGNO (operands[0]) != REGNO (operands[1]))
959 emit_move_insn (operands[0], operands[1]);
960 emit_insn (insn);
961 DONE;
962 }
963 }
964 if (<LOGICAL_BITOP:CODE> == AND
965 && CONST_INT_P (operands[2])
966 && const_zerobytes_operand (operands[2], DImode))
967 {
968 rtx insn = maybe_gen_pru_and_zerobytes (DImode,
969 operands[0],
970 operands[0],
971 operands[2]);
972 if (insn != nullptr)
973 {
974 if (REGNO (operands[0]) != REGNO (operands[1]))
975 emit_move_insn (operands[0], operands[1]);
976 emit_insn (insn);
977 DONE;
978 }
979 }
980 /* No optimized case found. Rely on the two-instruction pattern below. */
981 if (!reg_or_ubyte_operand (operands[2], DImode))
982 operands[2] = force_reg (DImode, operands[2]);
983 })
984
985 ;; 64-bit pattern for logical operations.
986 (define_insn "pru_<code>di3"
987 [(set (match_operand:DI 0 "register_operand" "=r,&r,r")
988 (LOGICAL_BITOP:DI
989 (match_operand:DI 1 "register_operand" "%0,r,r")
990 (match_operand:DI 2 "reg_or_ubyte_operand" "r,r,I")))]
991 ""
992 {
993 switch (which_alternative)
994 {
995 case 0:
996 if (REGNO (operands[0]) == (REGNO (operands[2]) + 4))
997 return "<logical_bitop_asm>\\t%N0, %N0, %N2\;"
998 "<logical_bitop_asm>\\t%F0, %F0, %F2";
999 else
1000 return "<logical_bitop_asm>\\t%F0, %F0, %F2\;"
1001 "<logical_bitop_asm>\\t%N0, %N0, %N2";
1002 case 1:
1003 /* With the three-register variant there is no way to handle the case
1004 when OP0 overlaps both OP1 and OP2. Example:
1005 OP0_lo == OP1_hi
1006 OP0_hi == OP2_lo
1007 Hence this variant's OP0 must be marked as an earlyclobber. */
1008 return "<logical_bitop_asm>\\t%F0, %F1, %F2\;"
1009 "<logical_bitop_asm>\\t%N0, %N1, %N2";
1010 case 2:
1011 if (REGNO (operands[0]) == (REGNO (operands[1]) + 4))
1012 return "<logical_bitop_asm>\\t%N0, %N1, 0\;"
1013 "<logical_bitop_asm>\\t%F0, %F1, %2";
1014 else
1015 return "<logical_bitop_asm>\\t%F0, %F1, %2\;"
1016 "<logical_bitop_asm>\\t%N0, %N1, 0";
1017 default:
1018 gcc_unreachable ();
1019 }
1020 }
1021 [(set_attr "type" "alu")
1022 (set_attr "length" "8")])
1023
1024 (define_insn "one_cmpldi2"
1025 [(set (match_operand:DI 0 "register_operand" "=r")
1026 (not:DI (match_operand:DI 1 "register_operand" "r")))]
1027 ""
1028 {
1029 /* careful with overlapping source and destination regs. */
1030 gcc_assert (GP_REG_P (REGNO (operands[0])));
1031 gcc_assert (GP_REG_P (REGNO (operands[1])));
1032 if (REGNO (operands[0]) == (REGNO (operands[1]) + 4))
1033 return "not\\t%N0, %N1\;not\\t%F0, %F1";
1034 else
1035 return "not\\t%F0, %F1\;not\\t%N0, %N1";
1036 }
1037 [(set_attr "type" "alu")
1038 (set_attr "length" "8")])
1039 \f
1040 ;; Multiply instruction. The nop is required to ensure that Rmd0 and Rms0
1041 ;; registers are sampled and multiplication is executed on those values.
1042 ;; Only after that one cycle can xin obtain the result.
1043
1044 (define_insn "mulsi3"
1045 [(set (match_operand:SI 0 "pru_muldst_operand" "=Rmd0")
1046 (mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0")
1047 (match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))]
1048 ""
1049 "nop\;xin\\t0, %0, 4"
1050 [(set_attr "type" "alu")
1051 (set_attr "length" "8")])
1052 \f
1053 ;; Prologue, Epilogue and Return
1054
1055 (define_expand "prologue"
1056 [(const_int 1)]
1057 ""
1058 {
1059 pru_expand_prologue ();
1060 DONE;
1061 })
1062
1063 (define_expand "epilogue"
1064 [(return)]
1065 ""
1066 {
1067 pru_expand_epilogue (false);
1068 DONE;
1069 })
1070
1071 (define_expand "sibcall_epilogue"
1072 [(return)]
1073 ""
1074 {
1075 pru_expand_epilogue (true);
1076 DONE;
1077 })
1078
1079 (define_insn "return"
1080 [(simple_return)]
1081 "pru_can_use_return_insn ()"
1082 "ret")
1083
1084 (define_insn "simple_return"
1085 [(simple_return)]
1086 ""
1087 "ret")
1088
1089 ;; Block any insns from being moved before this point, since the
1090 ;; profiling call to mcount can use various registers that aren't
1091 ;; saved or used to pass arguments.
1092
1093 (define_insn "blockage"
1094 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
1095 ""
1096 ""
1097 [(set_attr "type" "unknown")
1098 (set_attr "length" "0")])
1099 \f
1100 ;; Jumps and calls
1101
1102 (define_insn "indirect_jump"
1103 [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
1104 ""
1105 "jmp\\t%0"
1106 [(set_attr "type" "control")])
1107
1108 (define_insn "jump"
1109 [(set (pc)
1110 (label_ref (match_operand 0)))]
1111 ""
1112 "jmp\\t%%label(%l0)"
1113 [(set_attr "type" "control")])
1114
1115
1116 (define_expand "call"
1117 [(parallel [(call (match_operand 0 "")
1118 (match_operand 1 ""))
1119 (clobber (reg:HI RA_REGNUM))])]
1120 ""
1121 "")
1122
1123 (define_expand "call_value"
1124 [(parallel [(set (match_operand 0 "")
1125 (call (match_operand 1 "")
1126 (match_operand 2 "")))
1127 (clobber (reg:HI RA_REGNUM))])]
1128 ""
1129 "")
1130
1131 (define_insn "*call"
1132 [(call (mem:SI (match_operand:SI 0 "call_operand" "i,r"))
1133 (match_operand 1))
1134 (clobber (reg:HI RA_REGNUM))]
1135 ""
1136 "@
1137 call\\t%%label(%0)
1138 call\\t%0"
1139 [(set_attr "type" "control")])
1140
1141 (define_insn "*call_value"
1142 [(set (match_operand 0)
1143 (call (mem:SI (match_operand:SI 1 "call_operand" "i,r"))
1144 (match_operand 2)))
1145 (clobber (reg:HI RA_REGNUM))]
1146 ""
1147 "@
1148 call\\t%%label(%1)
1149 call\\t%1"
1150 [(set_attr "type" "control")])
1151
1152 (define_expand "sibcall"
1153 [(parallel [(call (match_operand 0 "")
1154 (match_operand 1 ""))
1155 (return)])]
1156 ""
1157 "")
1158
1159 (define_expand "sibcall_value"
1160 [(parallel [(set (match_operand 0 "")
1161 (call (match_operand 1 "")
1162 (match_operand 2 "")))
1163 (return)])]
1164 ""
1165 "")
1166
1167 (define_insn "*sibcall"
1168 [(call (mem:SI (match_operand:SI 0 "call_operand" "i,Rsib"))
1169 (match_operand 1))
1170 (return)]
1171 "SIBLING_CALL_P (insn)"
1172 "@
1173 jmp\\t%%label(%0)
1174 jmp\\t%0"
1175 [(set_attr "type" "control")])
1176
1177 (define_insn "*sibcall_value"
1178 [(set (match_operand 0 "register_operand" "")
1179 (call (mem:SI (match_operand:SI 1 "call_operand" "i,Rsib"))
1180 (match_operand 2)))
1181 (return)]
1182 "SIBLING_CALL_P (insn)"
1183 "@
1184 jmp\\t%%label(%1)
1185 jmp\\t%1"
1186 [(set_attr "type" "control")])
1187
1188 (define_insn "*tablejump"
1189 [(set (pc)
1190 (match_operand:SI 0 "register_operand" "r"))
1191 (use (label_ref (match_operand 1)))]
1192 ""
1193 "jmp\\t%0"
1194 [(set_attr "type" "control")])
1195 \f
1196 ;; Expand the cbranch pattern in order to assign different constraints for
1197 ;; signed and unsigned comparisons.
1198 (define_expand "cbranch<mode>4"
1199 [(set (pc)
1200 (if_then_else
1201 (match_operator 0 "ordered_comparison_operator"
1202 [(match_operand:QISI 1 "register_operand")
1203 (match_operand:QISI 2 "reg_or_const_int_operand")])
1204 (label_ref (match_operand 3 ""))
1205 (pc)))]
1206 ""
1207 {
1208 /* Ensure our patterns will be able to handle the particular const_int. */
1209 if (CONST_INT_P (operands[2]))
1210 {
1211 HOST_WIDE_INT ival = INTVAL (operands[2]);
1212
1213 /* For signed comparisons, we cannot play games with the const_int's
1214 sign. PRU patterns do not support negative integer constants. */
1215 if (pru_signed_cmp_operator (operands[0], VOIDmode) && !UBYTE_INT (ival))
1216 {
1217 if (can_create_pseudo_p ())
1218 operands[2] = force_reg (<MODE>mode, operands[2]);
1219 else
1220 FAIL;
1221 }
1222
1223 /* For unsigned comparisons, be prepared to handle the QI quirk. */
1224 if (pru_cmp_operator (operands[0], VOIDmode)
1225 && !const_ubyte_operand (operands[2], <MODE>mode))
1226 {
1227 if (can_create_pseudo_p ())
1228 operands[2] = force_reg (<MODE>mode, operands[2]);
1229 else
1230 FAIL;
1231 }
1232 }
1233 })
1234
1235 (define_insn "cbranch<mode>4_unsigned"
1236 [(set (pc)
1237 (if_then_else
1238 (match_operator 0 "pru_cmp_operator"
1239 [(match_operand:QISI 1 "register_operand" "r")
1240 (match_operand:QISI 2 "reg_or_ubyte_operand" "r<QISI:ubyte_constr>")])
1241 (label_ref (match_operand 3))
1242 (pc)))]
1243 ""
1244 {
1245 const bool is_near = (get_attr_length (insn) == 4);
1246
1247 /* PRU comparisons reverse the operand order (OP2 cmp OP1),
1248 so swap the condition. */
1249 if (is_near)
1250 return "qb%P0\t%l3, %1, %u2";
1251 else
1252 return "qb%Q0\t.+8, %1, %u2\;jmp\t%%label(%l3)";
1253 }
1254 [(set_attr "type" "control")
1255 (set (attr "length")
1256 (if_then_else
1257 (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
1258 (le (minus (match_dup 3) (pc)) (const_int 2036)))
1259 (const_int 4)
1260 (const_int 8)))])
1261
1262 ;; Unlike ALU operations, the const_int's sign here is important. So we
1263 ;; cannot use ubyte_constr.
1264 ;;
1265 ;; NOTE: The short branch check has no typo! We must be conservative and
1266 ;; take into account the worst case of having a signed comparison with a
1267 ;; "far taken branch" label, which amounts to 7 instructions.
1268 (define_insn "cbranch<mode>4_signed"
1269 [(set (pc)
1270 (if_then_else
1271 (match_operator 0 "pru_signed_cmp_operator"
1272 [(match_operand:QISI 1 "register_operand" "r,r,r")
1273 (match_operand:QISI 2 "reg_or_ubyte_operand" "r,Z,I")])
1274 (label_ref (match_operand 3))
1275 (pc)))]
1276 ""
1277 {
1278 const int length = (get_attr_length (insn));
1279 const bool is_near = (length == 20);
1280 enum rtx_code code = GET_CODE (operands[0]);
1281
1282 if (which_alternative == 0)
1283 return pru_output_signed_cbranch (operands, is_near);
1284 else if (which_alternative == 1 && (code == LT || code == GE))
1285 return pru_output_signed_cbranch_zeroop2 (operands, is_near);
1286 else
1287 return pru_output_signed_cbranch_ubyteop2 (operands, is_near);
1288 }
1289 [(set_attr "type" "control")
1290 (set (attr "length")
1291 (if_then_else
1292 (and (ge (minus (match_dup 3) (pc)) (const_int -2020))
1293 (le (minus (match_dup 3) (pc)) (const_int 2016)))
1294 (const_int 20)
1295 (const_int 28)))])
1296
1297 (define_expand "cbranch<mode>4"
1298 [(set (pc)
1299 (if_then_else (match_operator 0 "pru_fp_comparison_operator"
1300 [(match_operand:SFDF 1 "register_operand")
1301 (match_operand:SFDF 2 "register_operand")])
1302 (label_ref (match_operand 3 ""))
1303 (pc)))]
1304 ""
1305 {
1306 rtx t = pru_expand_fp_compare (operands[0], VOIDmode);
1307 operands[0] = t;
1308 operands[1] = XEXP (t, 0);
1309 operands[2] = XEXP (t, 1);
1310 })
1311
1312 ;
1313 ; Bit test branch
1314
1315 (define_code_iterator BIT_TEST [eq ne])
1316 (define_code_attr qbbx_op [(eq "qbbc") (ne "qbbs")])
1317 (define_code_attr qbbx_negop [(eq "qbbs") (ne "qbbc")])
1318
1319 (define_insn "cbranch_qbbx_<BIT_TEST:code><EQS0:mode><EQS1:mode><EQD:mode>4"
1320 [(set (pc)
1321 (if_then_else
1322 (BIT_TEST (zero_extract:EQD
1323 (match_operand:EQS0 0 "register_operand" "r")
1324 (const_int 1)
1325 (match_operand:EQS1 1 "reg_or_ubyte_operand" "r<EQS1:ubyte_constr>"))
1326 (const_int 0))
1327 (label_ref (match_operand 2))
1328 (pc)))]
1329 ""
1330 {
1331 const int length = (get_attr_length (insn));
1332 const bool is_near = (length == 4);
1333 if (is_near)
1334 return "<BIT_TEST:qbbx_op>\\t%l2, %0, %u1";
1335 else
1336 return "<BIT_TEST:qbbx_negop>\\t.+8, %0, %u1\;jmp\\t%%label(%l2)";
1337 }
1338 [(set_attr "type" "control")
1339 (set (attr "length")
1340 (if_then_else
1341 (and (ge (minus (match_dup 2) (pc)) (const_int -2048))
1342 (le (minus (match_dup 2) (pc)) (const_int 2044)))
1343 (const_int 4)
1344 (const_int 8)))])
1345
1346 ;; Bit test conditional branch, but only for constant bit positions.
1347 ;; This restriction allows an efficient code for DImode operands.
1348 ;;
1349 ;; QImode is already handled by the pattern variant above.
1350 (define_insn "@cbranch_qbbx_const_<BIT_TEST:code><HIDI:mode>"
1351 [(set (pc)
1352 (if_then_else
1353 (BIT_TEST (zero_extract:HIDI
1354 (match_operand:HIDI 0 "register_operand" "r")
1355 (const_int 1)
1356 (match_operand:VOID 1 "const_int_operand" "i"))
1357 (const_int 0))
1358 (label_ref (match_operand 2))
1359 (pc)))]
1360 ""
1361 {
1362 const int length = (get_attr_length (insn));
1363 const bool is_near = (length == 4);
1364
1365 if (<HIDI:MODE>mode == DImode && INTVAL (operands[1]) <= 31)
1366 {
1367 if (is_near)
1368 return "<BIT_TEST:qbbx_op>\\t%l2, %F0, %1";
1369 else
1370 return "<BIT_TEST:qbbx_negop>\\t.+8, %F0, %1\;jmp\\t%%label(%l2)";
1371 }
1372 else if (<HIDI:MODE>mode == DImode)
1373 {
1374 if (is_near)
1375 return "<BIT_TEST:qbbx_op>\\t%l2, %N0, %1 - 32";
1376 else
1377 return "<BIT_TEST:qbbx_negop>\\t.+8, %N0, %1 - 32\;jmp\\t%%label(%l2)";
1378 }
1379 else
1380 {
1381 if (is_near)
1382 return "<BIT_TEST:qbbx_op>\\t%l2, %0, %1";
1383 else
1384 return "<BIT_TEST:qbbx_negop>\\t.+8, %0, %1\;jmp\\t%%label(%l2)";
1385 }
1386 }
1387 [(set_attr "type" "control")
1388 (set (attr "length")
1389 (if_then_else
1390 (and (ge (minus (match_dup 2) (pc)) (const_int -2048))
1391 (le (minus (match_dup 2) (pc)) (const_int 2044)))
1392 (const_int 4)
1393 (const_int 8)))])
1394 \f
1395 ;; ::::::::::::::::::::
1396 ;; ::
1397 ;; :: Low Overhead Looping - idea "borrowed" from MEP
1398 ;; ::
1399 ;; ::::::::::::::::::::
1400
1401 ;; This insn is volatile because we'd like it to stay in its original
1402 ;; position, just before the loop header. If it stays there, we might
1403 ;; be able to convert it into a "loop" insn.
1404 (define_insn "@doloop_begin_internal<mode>"
1405 [(set (match_operand:HISI 0 "register_operand" "=r")
1406 (unspec_volatile:HISI
1407 [(match_operand:HISI 1 "reg_or_ubyte_operand" "rI")
1408 (match_operand 2 "const_int_operand" "")] UNSPECV_LOOP_BEGIN))]
1409 ""
1410 {
1411 gcc_unreachable ();
1412 })
1413
1414 (define_expand "doloop_begin"
1415 [(use (match_operand 0 "register_operand"))
1416 (use (match_operand 1 ""))]
1417 "TARGET_OPT_LOOP"
1418 {
1419 pru_emit_doloop (operands, 0);
1420 DONE;
1421 })
1422
1423 ; Note: "JUMP_INSNs and CALL_INSNs are not allowed to have any output
1424 ; reloads;". Hence this insn must be prepared for a counter that is
1425 ; not a register.
1426 (define_insn "@doloop_end_internal<mode>"
1427 [(set (pc)
1428 (if_then_else (ne (match_operand:HISI 0 "nonimmediate_operand" "+r,*m")
1429 (const_int 1))
1430 (label_ref (match_operand 1))
1431 (pc)))
1432 (set (match_dup 0)
1433 (plus:HISI (match_dup 0)
1434 (const_int -1)))
1435 (unspec [(match_operand 2 "const_int_operand" "")] UNSPECV_LOOP_END)
1436 (clobber (match_scratch:HISI 3 "=X,&r"))]
1437 ""
1438 {
1439 gcc_unreachable ();
1440 }
1441 ;; Worst case length:
1442 ;;
1443 ;; lbbo op3_reg, op3_ptr 4'
1444 ;; sub <op3_reg>, 1 4
1445 ;; qbeq .+8, <op3_reg>, 0 4
1446 ;; jmp <op1> 4
1447 ;; sbbo op3_reg, op3_ptr 4
1448 [(set (attr "length")
1449 (if_then_else
1450 (and (ge (minus (pc) (match_dup 1)) (const_int 0))
1451 (le (minus (pc) (match_dup 1)) (const_int 1020)))
1452 (cond [(eq_attr "alternative" "0") (const_int 4)]
1453 (const_int 12))
1454 (cond [(eq_attr "alternative" "0") (const_int 12)]
1455 (const_int 20))))])
1456
1457 (define_expand "doloop_end"
1458 [(use (match_operand 0 "nonimmediate_operand"))
1459 (use (label_ref (match_operand 1 "")))]
1460 "TARGET_OPT_LOOP"
1461 {
1462 if (GET_CODE (operands[0]) == REG && GET_MODE (operands[0]) == QImode)
1463 FAIL;
1464 pru_emit_doloop (operands, 1);
1465 DONE;
1466 })
1467
1468 (define_insn "@pruloop<mode>"
1469 [(set (reg:HISI LOOPCNTR_REGNUM)
1470 (unspec:HISI [(match_operand:HISI 0 "reg_or_ubyte_operand" "rI")
1471 (label_ref (match_operand 1))]
1472 UNSPECV_LOOP_BEGIN))]
1473 ""
1474 "loop\\t%l1, %0")
1475
1476 (define_insn "pruloop_end"
1477 [(unspec [(const_int 0)] UNSPECV_LOOP_END)]
1478 ""
1479 "# loop end"
1480 [(set_attr "length" "0")])
1481
1482 \f
1483 ;; Misc patterns
1484
1485 (define_insn "delay_cycles_start"
1486 [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
1487 UNSPECV_DELAY_CYCLES_START)]
1488 ""
1489 "/* Begin %0 cycle delay. */"
1490 [(set_attr "length" "0")])
1491
1492 (define_insn "delay_cycles_end"
1493 [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
1494 UNSPECV_DELAY_CYCLES_END)]
1495 ""
1496 "/* End %0 cycle delay. */"
1497 [(set_attr "length" "0")])
1498
1499
1500 (define_insn "delay_cycles_2x_plus1_hi"
1501 [(unspec_volatile [(match_operand:SI 0 "const_uhword_operand" "J")]
1502 UNSPECV_DELAY_CYCLES_2X_HI)
1503 (clobber (match_scratch:SI 1 "=&r"))]
1504 ""
1505 "ldi\\t%1, %0\;sub\\t%1, %1, 1\;qbne\\t.-4, %1, 0"
1506 [(set_attr "length" "12")])
1507
1508
1509 ; Do not use LDI32 here because we do not want
1510 ; to accidentally loose one instruction cycle.
1511 (define_insn "delay_cycles_2x_plus2_si"
1512 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")]
1513 UNSPECV_DELAY_CYCLES_2X_SI)
1514 (clobber (match_scratch:SI 1 "=&r"))]
1515 ""
1516 "ldi\\t%1.w0, %L0\;ldi\\t%1.w2, %H0\;sub\\t%1, %1, 1\;qbne\\t.-4, %1, 0"
1517 [(set_attr "length" "16")])
1518
1519 (define_insn "delay_cycles_1"
1520 [(unspec_volatile [(const_int 0) ] UNSPECV_DELAY_CYCLES_1)]
1521 ""
1522 "nop\\t# delay_cycles_1"
1523 )
1524
1525
1526 (define_insn "nop"
1527 [(const_int 0)]
1528 ""
1529 "nop"
1530 [(set_attr "type" "alu")])
1531
1532 (define_insn "nop_loop_guard"
1533 [(const_int 0)]
1534 ""
1535 "nop\\t# Loop end guard"
1536 [(set_attr "type" "alu")])
1537
1538 ;; HALT instruction.
1539 (define_insn "pru_halt"
1540 [(unspec_volatile [(const_int 0)] UNSPECV_HALT)]
1541 ""
1542 "halt"
1543 [(set_attr "type" "control")])
1544
1545 ;; Count Leading Zeros implemented using LMBD.
1546 ;; LMBD returns 32 if bit value is not present, and we subtract 31 to get CLZ.
1547 ;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
1548 (define_expand "clz<mode>2"
1549 [(set (match_operand:QISI 0 "register_operand")
1550 (clz:QISI (match_operand:QISI 1 "register_operand")))]
1551 ""
1552 {
1553 rtx dst = operands[0];
1554 rtx src = operands[1];
1555 rtx tmpval = gen_reg_rtx (<MODE>mode);
1556
1557 emit_insn (gen_pru_lmbd (<MODE>mode, tmpval, src, const1_rtx));
1558 emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval));
1559 DONE;
1560 })
1561
1562 ;; Left Most Bit Detect operation, which maps to a single instruction.
1563 (define_expand "@pru_lmbd<mode>"
1564 [(set (match_operand:QISI 0 "register_operand")
1565 (unspec:QISI
1566 [(match_operand:QISI 1 "register_operand")
1567 (match_operand:QISI 2 "reg_or_ubyte_operand")]
1568 UNSPEC_LMBD))]
1569 ""
1570 "")