]> git.ipfire.org Git - thirdparty/linux.git/blob - arch/arm64/lib/memmove.S
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 234
[thirdparty/linux.git] / arch / arm64 / lib / memmove.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2013 ARM Ltd.
4 * Copyright (C) 2013 Linaro.
5 *
6 * This code is based on glibc cortex strings work originally authored by Linaro
7 * be found @
8 *
9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10 * files/head:/src/aarch64/
11 */
12
13 #include <linux/linkage.h>
14 #include <asm/assembler.h>
15 #include <asm/cache.h>
16
17 /*
18 * Move a buffer from src to test (alignment handled by the hardware).
19 * If dest <= src, call memcpy, otherwise copy in reverse order.
20 *
21 * Parameters:
22 * x0 - dest
23 * x1 - src
24 * x2 - n
25 * Returns:
26 * x0 - dest
27 */
28 dstin .req x0
29 src .req x1
30 count .req x2
31 tmp1 .req x3
32 tmp1w .req w3
33 tmp2 .req x4
34 tmp2w .req w4
35 tmp3 .req x5
36 tmp3w .req w5
37 dst .req x6
38
39 A_l .req x7
40 A_h .req x8
41 B_l .req x9
42 B_h .req x10
43 C_l .req x11
44 C_h .req x12
45 D_l .req x13
46 D_h .req x14
47
48 .weak memmove
49 ENTRY(__memmove)
50 ENTRY(memmove)
51 cmp dstin, src
52 b.lo __memcpy
53 add tmp1, src, count
54 cmp dstin, tmp1
55 b.hs __memcpy /* No overlap. */
56
57 add dst, dstin, count
58 add src, src, count
59 cmp count, #16
60 b.lo .Ltail15 /*probably non-alignment accesses.*/
61
62 ands tmp2, src, #15 /* Bytes to reach alignment. */
63 b.eq .LSrcAligned
64 sub count, count, tmp2
65 /*
66 * process the aligned offset length to make the src aligned firstly.
67 * those extra instructions' cost is acceptable. It also make the
68 * coming accesses are based on aligned address.
69 */
70 tbz tmp2, #0, 1f
71 ldrb tmp1w, [src, #-1]!
72 strb tmp1w, [dst, #-1]!
73 1:
74 tbz tmp2, #1, 2f
75 ldrh tmp1w, [src, #-2]!
76 strh tmp1w, [dst, #-2]!
77 2:
78 tbz tmp2, #2, 3f
79 ldr tmp1w, [src, #-4]!
80 str tmp1w, [dst, #-4]!
81 3:
82 tbz tmp2, #3, .LSrcAligned
83 ldr tmp1, [src, #-8]!
84 str tmp1, [dst, #-8]!
85
86 .LSrcAligned:
87 cmp count, #64
88 b.ge .Lcpy_over64
89
90 /*
91 * Deal with small copies quickly by dropping straight into the
92 * exit block.
93 */
94 .Ltail63:
95 /*
96 * Copy up to 48 bytes of data. At this point we only need the
97 * bottom 6 bits of count to be accurate.
98 */
99 ands tmp1, count, #0x30
100 b.eq .Ltail15
101 cmp tmp1w, #0x20
102 b.eq 1f
103 b.lt 2f
104 ldp A_l, A_h, [src, #-16]!
105 stp A_l, A_h, [dst, #-16]!
106 1:
107 ldp A_l, A_h, [src, #-16]!
108 stp A_l, A_h, [dst, #-16]!
109 2:
110 ldp A_l, A_h, [src, #-16]!
111 stp A_l, A_h, [dst, #-16]!
112
113 .Ltail15:
114 tbz count, #3, 1f
115 ldr tmp1, [src, #-8]!
116 str tmp1, [dst, #-8]!
117 1:
118 tbz count, #2, 2f
119 ldr tmp1w, [src, #-4]!
120 str tmp1w, [dst, #-4]!
121 2:
122 tbz count, #1, 3f
123 ldrh tmp1w, [src, #-2]!
124 strh tmp1w, [dst, #-2]!
125 3:
126 tbz count, #0, .Lexitfunc
127 ldrb tmp1w, [src, #-1]
128 strb tmp1w, [dst, #-1]
129
130 .Lexitfunc:
131 ret
132
133 .Lcpy_over64:
134 subs count, count, #128
135 b.ge .Lcpy_body_large
136 /*
137 * Less than 128 bytes to copy, so handle 64 bytes here and then jump
138 * to the tail.
139 */
140 ldp A_l, A_h, [src, #-16]
141 stp A_l, A_h, [dst, #-16]
142 ldp B_l, B_h, [src, #-32]
143 ldp C_l, C_h, [src, #-48]
144 stp B_l, B_h, [dst, #-32]
145 stp C_l, C_h, [dst, #-48]
146 ldp D_l, D_h, [src, #-64]!
147 stp D_l, D_h, [dst, #-64]!
148
149 tst count, #0x3f
150 b.ne .Ltail63
151 ret
152
153 /*
154 * Critical loop. Start at a new cache line boundary. Assuming
155 * 64 bytes per line this ensures the entire loop is in one line.
156 */
157 .p2align L1_CACHE_SHIFT
158 .Lcpy_body_large:
159 /* pre-load 64 bytes data. */
160 ldp A_l, A_h, [src, #-16]
161 ldp B_l, B_h, [src, #-32]
162 ldp C_l, C_h, [src, #-48]
163 ldp D_l, D_h, [src, #-64]!
164 1:
165 /*
166 * interlace the load of next 64 bytes data block with store of the last
167 * loaded 64 bytes data.
168 */
169 stp A_l, A_h, [dst, #-16]
170 ldp A_l, A_h, [src, #-16]
171 stp B_l, B_h, [dst, #-32]
172 ldp B_l, B_h, [src, #-32]
173 stp C_l, C_h, [dst, #-48]
174 ldp C_l, C_h, [src, #-48]
175 stp D_l, D_h, [dst, #-64]!
176 ldp D_l, D_h, [src, #-64]!
177 subs count, count, #64
178 b.ge 1b
179 stp A_l, A_h, [dst, #-16]
180 stp B_l, B_h, [dst, #-32]
181 stp C_l, C_h, [dst, #-48]
182 stp D_l, D_h, [dst, #-64]!
183
184 tst count, #0x3f
185 b.ne .Ltail63
186 ret
187 ENDPIPROC(memmove)
188 EXPORT_SYMBOL(memmove)
189 ENDPROC(__memmove)
190 EXPORT_SYMBOL(__memmove)