]>
Commit | Line | Data |
---|---|---|
2b778ceb | 1 | /* Copyright (C) 2011-2021 Free Software Foundation, Inc. |
b21cb02f DAG |
2 | This file is part of the GNU C Library. |
3 | Code contributed by Dave Gilbert <david.gilbert@linaro.org> | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff | 16 | License along with the GNU C Library. If not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
b21cb02f DAG |
18 | |
19 | #include <sysdep.h> | |
20 | ||
21 | @ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7 | |
22 | @ and ARMv6T2 processors. It has a fast path for short sizes, and has an | |
23 | @ optimised path for large data sets; the worst case is finding the match early | |
24 | @ in a large data set. | |
25 | @ Note: The use of cbz/cbnz means it's Thumb only | |
26 | ||
27 | @ 2011-07-15 david.gilbert@linaro.org | |
28 | @ Copy from Cortex strings release 21 and change license | |
29 | @ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S | |
30 | @ Change function declarations/entry/exit | |
31 | @ 2011-12-01 david.gilbert@linaro.org | |
32 | @ Add some fixes from comments received (including use of ldrd instead ldm) | |
33 | @ 2011-12-07 david.gilbert@linaro.org | |
34 | @ Removed cbz from align loop - can't be taken | |
35 | ||
36 | @ this lets us check a flag in a 00/ff byte easily in either endianness | |
37 | #ifdef __ARMEB__ | |
38 | #define CHARTSTMASK(c) 1<<(31-(c*8)) | |
39 | #else | |
40 | #define CHARTSTMASK(c) 1<<(c*8) | |
41 | #endif | |
42 | .syntax unified | |
43 | ||
44 | .text | |
45 | .thumb | |
b21cb02f DAG |
46 | .thumb_func |
47 | .global memchr | |
48 | .type memchr,%function | |
49 | ENTRY(memchr) | |
50 | @ r0 = start of memory to scan | |
51 | @ r1 = character to look for | |
52 | @ r2 = length | |
53 | @ returns r0 = pointer to character or NULL if not found | |
54 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char | |
55 | ||
56 | cmp r2,#16 @ If it's short don't bother with anything clever | |
57 | blt 20f | |
58 | ||
59 | tst r0, #7 @ If it's already aligned skip the next bit | |
60 | beq 10f | |
61 | ||
62 | @ Work up to an aligned point | |
63 | 5: | |
81cb7a0b | 64 | ldrb r3, [r0],#1 |
b21cb02f DAG |
65 | subs r2, r2, #1 |
66 | cmp r3, r1 | |
67 | beq 50f @ If it matches exit found | |
68 | tst r0, #7 | |
69 | bne 5b @ If not aligned yet then do next byte | |
70 | ||
71 | 10: | |
72 | @ At this point, we are aligned, we know we have at least 8 bytes to work with | |
73 | push {r4,r5,r6,r7} | |
74 | cfi_adjust_cfa_offset (16) | |
75 | cfi_rel_offset (r4, 0) | |
76 | cfi_rel_offset (r5, 4) | |
77 | cfi_rel_offset (r6, 8) | |
78 | cfi_rel_offset (r7, 12) | |
79 | ||
80 | cfi_remember_state | |
81 | ||
82 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes | |
83 | orr r1, r1, r1, lsl #16 | |
47c71d93 | 84 | bic r6, r2, #7 @ Number of double words to work with * 8 |
b21cb02f DAG |
85 | mvns r7, #0 @ all F's |
86 | movs r3, #0 | |
87 | ||
88 | 15: | |
81cb7a0b | 89 | ldrd r4,r5, [r0],#8 |
47c71d93 RM |
90 | subs r6, r6, #8 |
91 | eor r4,r4, r1 @ Get it so that r4,r5 have 00's where the bytes match the target | |
92 | eor r5,r5, r1 | |
93 | uadd8 r4, r4, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 | |
94 | sel r4, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION | |
b21cb02f | 95 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
47c71d93 RM |
96 | sel r5, r4, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
97 | cbnz r5, 60f | |
b21cb02f DAG |
98 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again |
99 | ||
100 | pop {r4,r5,r6,r7} | |
101 | cfi_adjust_cfa_offset (-16) | |
102 | cfi_restore (r4) | |
103 | cfi_restore (r5) | |
104 | cfi_restore (r6) | |
105 | cfi_restore (r7) | |
106 | ||
107 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above | |
108 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done | |
109 | ||
110 | 20: | |
111 | cbz r2, 40f @ 0 length or hit the end already then not found | |
112 | ||
113 | 21: @ Post aligned section, or just a short call | |
81cb7a0b | 114 | ldrb r3,[r0],#1 |
b21cb02f DAG |
115 | subs r2,r2,#1 |
116 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub | |
117 | cbz r3, 50f | |
118 | bne 21b @ on r2 flags | |
119 | ||
120 | 40: | |
121 | movs r0,#0 @ not found | |
122 | DO_RET(lr) | |
123 | ||
124 | 50: | |
125 | subs r0,r0,#1 @ found | |
126 | DO_RET(lr) | |
127 | ||
128 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was | |
129 | @ r0 points to the start of the double word after the one that was tested | |
47c71d93 | 130 | @ r4 has the 00/ff pattern for the first word, r5 has the chained value |
b21cb02f | 131 | cfi_restore_state |
47c71d93 | 132 | cmp r4, #0 |
b21cb02f | 133 | itte eq |
47c71d93 | 134 | moveq r4, r5 @ the end is in the 2nd word |
b21cb02f DAG |
135 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word |
136 | subne r0,r0,#7 @ or 2nd byte of 1st word | |
137 | ||
138 | @ r0 currently points to the 2nd byte of the word containing the hit | |
47c71d93 | 139 | tst r4, # CHARTSTMASK(0) @ 1st character |
b21cb02f DAG |
140 | bne 61f |
141 | adds r0,r0,#1 | |
47c71d93 | 142 | tst r4, # CHARTSTMASK(1) @ 2nd character |
b21cb02f DAG |
143 | ittt eq |
144 | addeq r0,r0,#1 | |
47c71d93 | 145 | tsteq r4, # (3<<15) @ 2nd & 3rd character |
b21cb02f DAG |
146 | @ If not the 3rd must be the last one |
147 | addeq r0,r0,#1 | |
148 | ||
149 | 61: | |
150 | pop {r4,r5,r6,r7} | |
151 | cfi_adjust_cfa_offset (-16) | |
152 | cfi_restore (r4) | |
153 | cfi_restore (r5) | |
154 | cfi_restore (r6) | |
155 | cfi_restore (r7) | |
156 | ||
157 | subs r0,r0,#1 | |
158 | DO_RET(lr) | |
159 | ||
160 | END(memchr) | |
161 | libc_hidden_builtin_def (memchr) |