From: Hans-Peter Nilsson Date: Sun, 23 Apr 2023 04:21:13 +0000 (+0200) Subject: CRIS: peephole2 an add into two addq or subq X-Git-Tag: basepoints/gcc-15~9606 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=35b7618e3ae25b3a293c456e24edc5432a936e22;p=thirdparty%2Fgcc.git CRIS: peephole2 an add into two addq or subq Unfortunately, doesn't cause a performance improvement for coremark, but happens a few times in newlib, just enough to affect coremark 0.01% by size (or 4 bytes, and three cycles (__fwalk_sglue and __vfiprintf_r each two bytes). gcc: * config/cris/cris.md (splitop): Add PLUS. * config/cris/cris.cc (cris_split_constant): Also handle PLUS when a split into two insns may be useful. gcc/testsuite: * gcc.target/cris/peep2-addsplit1.c: New test. --- diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index 44b4545d8ce1..1c7289b027b8 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -2651,7 +2651,30 @@ cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code, int32_t ival = (int32_t) wval; uint32_t uval = (uint32_t) wval; - if (code != AND || IN_RANGE(ival, -32, 31) + /* Can we do with two addq or two subq, improving chances of filling a + delay-slot? At worst, we break even, both performance and + size-wise. */ + if (code == PLUS + && (IN_RANGE (ival, -63 * 2, -63 - 1) + || IN_RANGE (ival, 63 + 1, 63 * 2))) + { + if (generate) + { + int sign = ival < 0 ? -1 : 1; + int aval = abs (ival); + + if (mode != SImode) + { + dest = gen_rtx_REG (SImode, REGNO (dest)); + op = gen_rtx_REG (SImode, REGNO (op)); + } + emit_insn (gen_addsi3 (dest, op, GEN_INT (63 * sign))); + emit_insn (gen_addsi3 (dest, op, GEN_INT ((aval - 63) * sign))); + } + return 2; + } + + if (code != AND || IN_RANGE (ival, -32, 31) /* Implemented using movu.[bw] elsewhere. */ || ival == 255 || ival == 65535 /* Implemented using clear.[bw] elsewhere. */ diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index 9e3fa78c39ec..7504b63dabf3 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -209,7 +209,7 @@ (define_code_iterator plusumin [plus umin]) ;; For opsplit1. -(define_code_iterator splitop [and]) +(define_code_iterator splitop [and plus]) ;; The addsubbo and nd code-attributes form a hack. We need to output ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd @@ -2984,6 +2984,10 @@ ;; Large (read: non-quick) numbers can sometimes be AND:ed by other means. ;; Testcase: gcc.target/cris/peep2-andsplit1.c +;; +;; Another case is add N,rx with -126..-64,64..126: it has the same +;; size and execution time as two addq or subq, but addq and subq can fill +;; a delay-slot. (define_peephole2 ; opsplit1 [(parallel [(set (match_operand 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c new file mode 100644 index 000000000000..b69c0d63ebec --- /dev/null +++ b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c @@ -0,0 +1,52 @@ +/* Check that "opsplit1" with PLUS does its job. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-leading-underscore" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +int addsi (int x) +{ + return x + 64; +} + +char addqi (char x) +{ + return x + 126; +} + +short addhi (short x) +{ + return x - 64; +} + +unsigned short addhi2 (short x) +{ + return x - 126; +} + +/* +** addsi: +** addq 63,.r10 +** ret +** addq 1,.r10 +*/ + +/* +** addqi: +** addq 63,.r10 +** ret +** addq 63,.r10 +*/ + +/* +** addhi: +** subq 63,.r10 +** ret +** subq 1,.r10 +*/ + +/* +** addhi2: +** subq 63,.r10 +** ret +** subq 63,.r10 +*/