]>
Commit | Line | Data |
---|---|---|
525de033 | 1 | /* Optimized memset for Huawei Kunpeng processor. |
2b778ceb | 2 | Copyright (C) 2012-2021 Free Software Foundation, Inc. |
525de033 XZ |
3 | |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library. If not, see | |
18 | <https://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <sysdep.h> | |
21 | #include <sysdeps/aarch64/memset-reg.h> | |
22 | ||
23 | #if IS_IN (libc) | |
24 | # define MEMSET __memset_kunpeng | |
25 | ||
26 | /* Assumptions: | |
27 | * | |
28 | * ARMv8-a, AArch64, unaligned accesses | |
29 | * | |
30 | */ | |
31 | ||
32 | ENTRY_ALIGN (MEMSET, 6) | |
33 | ||
45b1e17e SN |
34 | PTR_ARG (0) |
35 | SIZE_ARG (2) | |
525de033 XZ |
36 | |
37 | dup v0.16B, valw | |
38 | add dstend, dstin, count | |
39 | ||
40 | cmp count, 128 | |
41 | b.hs L(set_long) | |
42 | ||
43 | cmp count, 16 | |
44 | b.lo L(less16) | |
45 | ||
46 | /* Set 16..127 bytes. */ | |
47 | str q0, [dstin] | |
48 | tbnz count, 6, L(set127) | |
49 | str q0, [dstend, -16] | |
50 | tbz count, 5, 1f | |
51 | str q0, [dstin, 16] | |
52 | str q0, [dstend, -32] | |
53 | 1: ret | |
54 | ||
55 | .p2align 4 | |
56 | /* Set 64..127 bytes. Write 64 bytes from the start and | |
57 | 64 bytes from the end. */ | |
58 | L(set127): | |
59 | stp q0, q0, [dstin, 16] | |
60 | str q0, [dstin, 48] | |
61 | stp q0, q0, [dstend, -64] | |
62 | stp q0, q0, [dstend, -32] | |
63 | ret | |
64 | ||
65 | .p2align 4 | |
66 | /* Set 0..15 bytes. */ | |
67 | L(less16): | |
68 | tbz count, 3, L(less8) | |
69 | str d0, [dstin] | |
70 | str d0, [dstend, -8] | |
71 | ret | |
72 | L(less8): | |
73 | tbz count, 2, 2f | |
74 | str s0, [dstin] | |
75 | str s0, [dstend, -4] | |
76 | ret | |
77 | 2: cbz count, 3f | |
78 | str b0, [dstin] | |
79 | tbz count, 1, 3f | |
80 | str h0, [dstend, -2] | |
81 | 3: ret | |
82 | ||
83 | .p2align 4 | |
84 | L(set_long): | |
85 | bic dst, dstin, 15 | |
86 | str q0, [dstin] | |
87 | sub count, dstend, dst /* Count is 16 too large. */ | |
88 | sub dst, dst, 16 /* Dst is biased by -32. */ | |
89 | sub count, count, 64 + 16 + 1 /* Adjust count and bias for loop. */ | |
90 | 1: stp q0, q0, [dst, 32] | |
91 | stp q0, q0, [dst, 64]! | |
92 | subs count, count, 64 | |
93 | b.lo 1f | |
94 | stp q0, q0, [dst, 32] | |
95 | stp q0, q0, [dst, 64]! | |
96 | subs count, count, 64 | |
97 | b.lo 1f | |
98 | stp q0, q0, [dst, 32] | |
99 | stp q0, q0, [dst, 64]! | |
100 | subs count, count, 64 | |
101 | b.lo 1f | |
102 | stp q0, q0, [dst, 32] | |
103 | stp q0, q0, [dst, 64]! | |
104 | subs count, count, 64 | |
105 | b.hs 1b | |
106 | ||
107 | 1: stp q0, q0, [dstend, -64] | |
108 | stp q0, q0, [dstend, -32] | |
109 | ret | |
110 | ||
111 | END (MEMSET) | |
112 | libc_hidden_builtin_def (MEMSET) | |
113 | #endif |