]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
ARC: Improved DImode rotates and right shifts by one bit.
authorRoger Sayle <roger@nextmovesoftware.com>
Mon, 13 Nov 2023 09:16:59 +0000 (09:16 +0000)
committerRoger Sayle <roger@nextmovesoftware.com>
Mon, 13 Nov 2023 09:16:59 +0000 (09:16 +0000)
This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit.  In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon the x86's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        lsr_s   r1,r1

After:
lshr: lsr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr     r12,r1,31
        lsr     r2,r0,31
        asl_s   r3,r0,1
        asl_s   r1,r1,1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotl:   add.f   r0,r0,r0
        adc.f   r1,r1,r1
        j_s.d   [blink]
        add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl     r12,r1,31
        asl     r2,r0,31
        lsr_s   r3,r0
        lsr_s   r1,r1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotr:   asr.f   0,r0
        rrc.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

On CPUs without a barrel shifter the improvements are even better.

2023-11-13  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
* config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
represents the carry flag being set if the operand is non-zero.
(adc_f): New define_insn representing adc with updated flags.
(ashrdi3): New define_expand that only handles shifts by 1.
(ashrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrdi3): New define_expand that only handles shifts by 1.
(lshrdi3_cnt1): New pre-reload define_insn_and_split.
(rrcsi2): New define_insn for rrc (SImode rotate right through carry).
(rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
(rotldi3): New define_expand that only handles rotates by 1.
(rotldi3_cnt1): New pre-reload define_insn_and_split.
(rotrdi3): New define_expand that only handles rotates by 1.
(rotrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrsi3_cnt1_carry): New define_insn for lsr.f.
(ashrsi3_cnt1_carry): New define_insn for asr.f.
(btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
* gcc.target/arc/ashrdi3-1.c: New test case.
* gcc.target/arc/lshrdi3-1.c: Likewise.
* gcc.target/arc/rotldi3-1.c: Likewise.
* gcc.target/arc/rotrdi3-1.c: Likewise.

gcc/config/arc/arc.md
gcc/testsuite/gcc.target/arc/ashrdi3-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arc/lshrdi3-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arc/rotldi3-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arc/rotrdi3-1.c [new file with mode: 0644]

index 8c121cddd6e6c5392dfcadd170236b074c7c6f06..4ae3a67c0ea0ba2247047b4742ede997b6da6a43 100644 (file)
   UNSPEC_ARC_VMAC2HU
   UNSPEC_ARC_VMPY2H
   UNSPEC_ARC_VMPY2HU
+  UNSPEC_ARC_CC_NEZ
 
   VUNSPEC_ARC_RTIE
   VUNSPEC_ARC_SYNC
@@ -2789,6 +2790,31 @@ archs4x, archs4xd"
    (set_attr "type" "cc_arith")
    (set_attr "length" "4,4,4,4,8,8")])
 
+(define_insn "adc_f"
+  [(set (reg:CC_C CC_REG)
+       (compare:CC_C
+         (zero_extend:DI
+           (plus:SI
+             (plus:SI
+               (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+               (match_operand:SI 1 "register_operand" "%r"))
+             (match_operand:SI 2 "register_operand" "r")))
+         (plus:DI
+           (ltu:DI (reg:CC_C CC_REG) (const_int 0))
+           (zero_extend:DI (match_dup 1)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+       (plus:SI
+         (plus:SI
+           (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+           (match_dup 1))
+         (match_dup 2)))]
+  ""
+  "adc.f\\t%0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "predicable" "no")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
 ; combiner-splitter cmp / scc -> cmp / adc
 (define_split
   [(set (match_operand:SI 0 "dest_reg_operand" "")
@@ -3529,6 +3555,68 @@ archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_expand "ashrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (ashiftrt:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into asr.f hi; rrc lo
+(define_insn_and_split "*ashrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+                                    gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+                        gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "lshrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (lshiftrt:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into lsr.f hi; rrc lo
+(define_insn_and_split "*lshrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+                                    gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+                        gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
 ;; Rotate instructions.
 
 (define_insn "rotrsi3_insn"
@@ -3570,6 +3658,103 @@ archs4x, archs4xd"
     }
 })
 
+;; Rotate through carry flag
+
+(define_insn "rrcsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (plus:SI
+         (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+                      (const_int 1))
+         (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+                    (const_int 31))))]
+  ""
+  "rrc\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+(define_insn "rrcsi2_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (plus:SI
+         (lshiftrt:SI (match_dup 1) (const_int 1))
+         (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+                    (const_int 31))))]
+  ""
+  "rrc.f\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+;; DImode Rotate instructions
+
+(define_expand "rotldi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (rotate:DI (match_operand:DI 1 "register_operand")
+                      (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into add.f lo; adc.f hi; adc lo
+(define_insn_and_split "*rotldi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (rotate:DI (match_operand:DI 1 "register_operand")
+                  (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo0 = gen_lowpart (SImode, operands[0]);
+  rtx lo1 = gen_lowpart (SImode, operands[1]);
+  rtx hi1 = gen_highpart (SImode, operands[1]);
+  emit_insn (gen_add_f (lo0, lo1, lo1));
+  emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
+  emit_insn (gen_adc (lo0, lo0, const0_rtx));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
+(define_expand "rotrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (rotatert:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into asr.f lo; rrc.f hi; rrc lo
+(define_insn_and_split "*rotrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (rotatert:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo = gen_lowpart (SImode, operands[1]);
+  emit_insn (gen_btst_0_carry (lo));
+  emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
+                              gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
 ;; Compare / branch instructions.
 
 (define_expand "cbranchsi4"
@@ -6009,6 +6194,18 @@ archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "lshrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (lshiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "lsr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_insn "ashrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"             "=q,w")
        (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
@@ -6019,6 +6216,28 @@ archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "ashrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (ashiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "asr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
+(define_insn "btst_0_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))]
+  ""
+  "asr.f\\t0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
        (zero_extract:SI (match_dup 0)
diff --git a/gcc/testsuite/gcc.target/arc/ashrdi3-1.c b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
new file mode 100644 (file)
index 0000000..d990bfd
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/lshrdi3-1.c b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
new file mode 100644 (file)
index 0000000..6542ffd
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotldi3-1.c b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
new file mode 100644 (file)
index 0000000..325996e
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x << 1) | (x >> 63);
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
+/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
+/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotrdi3-1.c b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
new file mode 100644 (file)
index 0000000..cd8e0de
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x >> 1) | (x << 63);
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
+/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */