]>
Commit | Line | Data |
---|---|---|
d8834a13 MW |
1 | /* |
2 | * linux/arch/arm/lib/memset.S | |
3 | * | |
4 | * Copyright (C) 1995-2000 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | * ASM optimised string functions | |
11 | */ | |
75d7a0d7 | 12 | #include <linux/linkage.h> |
d8834a13 MW |
13 | #include <asm/assembler.h> |
14 | ||
15 | .text | |
16 | .align 5 | |
d8834a13 | 17 | |
75d7a0d7 | 18 | .syntax unified |
3a649407 | 19 | #if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD) |
75d7a0d7 SA |
20 | .thumb |
21 | .thumb_func | |
22 | #endif | |
23 | ENTRY(memset) | |
d8834a13 | 24 | ands r3, r0, #3 @ 1 unaligned? |
75d7a0d7 SA |
25 | mov ip, r0 @ preserve r0 as return value |
26 | bne 6f @ 1 | |
d8834a13 | 27 | /* |
75d7a0d7 | 28 | * we know that the pointer in ip is aligned to a word boundary. |
d8834a13 | 29 | */ |
75d7a0d7 | 30 | 1: orr r1, r1, r1, lsl #8 |
d8834a13 MW |
31 | orr r1, r1, r1, lsl #16 |
32 | mov r3, r1 | |
33 | cmp r2, #16 | |
34 | blt 4f | |
35 | ||
36 | #if ! CALGN(1)+0 | |
37 | ||
38 | /* | |
75d7a0d7 | 39 | * We need 2 extra registers for this loop - use r8 and the LR |
d8834a13 | 40 | */ |
75d7a0d7 SA |
41 | stmfd sp!, {r8, lr} |
42 | mov r8, r1 | |
d8834a13 MW |
43 | mov lr, r1 |
44 | ||
45 | 2: subs r2, r2, #64 | |
75d7a0d7 SA |
46 | stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. |
47 | stmiage ip!, {r1, r3, r8, lr} | |
48 | stmiage ip!, {r1, r3, r8, lr} | |
49 | stmiage ip!, {r1, r3, r8, lr} | |
d8834a13 | 50 | bgt 2b |
75d7a0d7 | 51 | ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. |
d8834a13 MW |
52 | /* |
53 | * No need to correct the count; we're only testing bits from now on | |
54 | */ | |
55 | tst r2, #32 | |
75d7a0d7 SA |
56 | stmiane ip!, {r1, r3, r8, lr} |
57 | stmiane ip!, {r1, r3, r8, lr} | |
d8834a13 | 58 | tst r2, #16 |
75d7a0d7 SA |
59 | stmiane ip!, {r1, r3, r8, lr} |
60 | ldmfd sp!, {r8, lr} | |
d8834a13 MW |
61 | |
62 | #else | |
63 | ||
64 | /* | |
65 | * This version aligns the destination pointer in order to write | |
66 | * whole cache lines at once. | |
67 | */ | |
68 | ||
75d7a0d7 | 69 | stmfd sp!, {r4-r8, lr} |
d8834a13 MW |
70 | mov r4, r1 |
71 | mov r5, r1 | |
72 | mov r6, r1 | |
73 | mov r7, r1 | |
75d7a0d7 | 74 | mov r8, r1 |
d8834a13 MW |
75 | mov lr, r1 |
76 | ||
77 | cmp r2, #96 | |
75d7a0d7 | 78 | tstgt ip, #31 |
d8834a13 MW |
79 | ble 3f |
80 | ||
75d7a0d7 SA |
81 | and r8, ip, #31 |
82 | rsb r8, r8, #32 | |
83 | sub r2, r2, r8 | |
84 | movs r8, r8, lsl #(32 - 4) | |
85 | stmiacs ip!, {r4, r5, r6, r7} | |
86 | stmiami ip!, {r4, r5} | |
87 | tst r8, #(1 << 30) | |
88 | mov r8, r1 | |
89 | strne r1, [ip], #4 | |
d8834a13 MW |
90 | |
91 | 3: subs r2, r2, #64 | |
75d7a0d7 SA |
92 | stmiage ip!, {r1, r3-r8, lr} |
93 | stmiage ip!, {r1, r3-r8, lr} | |
d8834a13 | 94 | bgt 3b |
75d7a0d7 | 95 | ldmfdeq sp!, {r4-r8, pc} |
d8834a13 MW |
96 | |
97 | tst r2, #32 | |
75d7a0d7 | 98 | stmiane ip!, {r1, r3-r8, lr} |
d8834a13 | 99 | tst r2, #16 |
75d7a0d7 SA |
100 | stmiane ip!, {r4-r7} |
101 | ldmfd sp!, {r4-r8, lr} | |
d8834a13 MW |
102 | |
103 | #endif | |
104 | ||
105 | 4: tst r2, #8 | |
75d7a0d7 | 106 | stmiane ip!, {r1, r3} |
d8834a13 | 107 | tst r2, #4 |
75d7a0d7 | 108 | strne r1, [ip], #4 |
d8834a13 MW |
109 | /* |
110 | * When we get here, we've got less than 4 bytes to zero. We | |
111 | * may have an unaligned pointer as well. | |
112 | */ | |
113 | 5: tst r2, #2 | |
75d7a0d7 SA |
114 | strbne r1, [ip], #1 |
115 | strbne r1, [ip], #1 | |
d8834a13 | 116 | tst r2, #1 |
75d7a0d7 SA |
117 | strbne r1, [ip], #1 |
118 | ret lr | |
119 | ||
120 | 6: subs r2, r2, #4 @ 1 do we have enough | |
121 | blt 5b @ 1 bytes to align with? | |
122 | cmp r3, #2 @ 1 | |
123 | strblt r1, [ip], #1 @ 1 | |
124 | strble r1, [ip], #1 @ 1 | |
125 | strb r1, [ip], #1 @ 1 | |
126 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | |
127 | b 1b | |
128 | ENDPROC(memset) |