]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
[4/5] AArch64: Improve A64FX memset by removing unroll32
authorWilco Dijkstra <wdijkstr@arm.com>
Tue, 10 Aug 2021 12:44:27 +0000 (13:44 +0100)
committerWilco Dijkstra <wdijkstr@arm.com>
Tue, 10 Aug 2021 12:44:27 +0000 (13:44 +0100)
Remove unroll32 code since it doesn't improve performance.

Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com>
sysdeps/aarch64/multiarch/memset_a64fx.S

index 337c86be6f5dd2ab02540754b0fbebd819c1bff3..ef0315658a676a8692c3f18cc9f914bd6e145991 100644 (file)
@@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic
        ccmp    vector_length, tmp1, 0, cs
        b.eq    L(L1_prefetch)
 
-L(unroll32):
-       lsl     tmp1, vector_length, 3  // vector_length * 8
-       lsl     tmp2, vector_length, 5  // vector_length * 32
-       .p2align 3
-1:     cmp     rest, tmp2
-       b.cc    L(unroll8)
-       st1b_unroll
-       add     dst, dst, tmp1
-       st1b_unroll
-       add     dst, dst, tmp1
-       st1b_unroll
-       add     dst, dst, tmp1
-       st1b_unroll
-       add     dst, dst, tmp1
-       sub     rest, rest, tmp2
-       b       1b
 
 L(unroll8):
        lsl     tmp1, vector_length, 3
@@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE
        sub     rest, rest, CACHE_LINE_SIZE * 2
        cmp     rest, L1_SIZE
        b.ge    1b
-       cbnz    rest, L(unroll32)
+       cbnz    rest, L(unroll8)
        ret
 
        // count >= L2_SIZE