]>
Commit | Line | Data |
---|---|---|
b21cb02f DAG |
1 | /* Copyright (C) 2011 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. | |
3 | Code contributed by Dave Gilbert <david.gilbert@linaro.org> | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
ab84e3ff PE |
16 | License along with the GNU C Library. If not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
b21cb02f DAG |
18 | |
19 | #include <sysdep.h> | |
20 | ||
21 | @ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7 | |
22 | @ and ARMv6T2 processors. It has a fast path for short sizes, and has an | |
23 | @ optimised path for large data sets; the worst case is finding the match early | |
24 | @ in a large data set. | |
25 | @ Note: The use of cbz/cbnz means it's Thumb only | |
26 | ||
27 | @ 2011-07-15 david.gilbert@linaro.org | |
28 | @ Copy from Cortex strings release 21 and change license | |
29 | @ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S | |
30 | @ Change function declarations/entry/exit | |
31 | @ 2011-12-01 david.gilbert@linaro.org | |
32 | @ Add some fixes from comments received (including use of ldrd instead ldm) | |
33 | @ 2011-12-07 david.gilbert@linaro.org | |
34 | @ Removed cbz from align loop - can't be taken | |
35 | ||
36 | @ this lets us check a flag in a 00/ff byte easily in either endianness | |
37 | #ifdef __ARMEB__ | |
38 | #define CHARTSTMASK(c) 1<<(31-(c*8)) | |
39 | #else | |
40 | #define CHARTSTMASK(c) 1<<(c*8) | |
41 | #endif | |
42 | .syntax unified | |
43 | ||
44 | .text | |
45 | .thumb | |
46 | ||
47 | @ --------------------------------------------------------------------------- | |
48 | .thumb_func | |
49 | .global memchr | |
50 | .type memchr,%function | |
51 | ENTRY(memchr) | |
52 | @ r0 = start of memory to scan | |
53 | @ r1 = character to look for | |
54 | @ r2 = length | |
55 | @ returns r0 = pointer to character or NULL if not found | |
56 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char | |
57 | ||
58 | cmp r2,#16 @ If it's short don't bother with anything clever | |
59 | blt 20f | |
60 | ||
61 | tst r0, #7 @ If it's already aligned skip the next bit | |
62 | beq 10f | |
63 | ||
64 | @ Work up to an aligned point | |
65 | 5: | |
66 | ldrb r3, [r0],#1 | |
67 | subs r2, r2, #1 | |
68 | cmp r3, r1 | |
69 | beq 50f @ If it matches exit found | |
70 | tst r0, #7 | |
71 | bne 5b @ If not aligned yet then do next byte | |
72 | ||
73 | 10: | |
74 | @ At this point, we are aligned, we know we have at least 8 bytes to work with | |
75 | push {r4,r5,r6,r7} | |
76 | cfi_adjust_cfa_offset (16) | |
77 | cfi_rel_offset (r4, 0) | |
78 | cfi_rel_offset (r5, 4) | |
79 | cfi_rel_offset (r6, 8) | |
80 | cfi_rel_offset (r7, 12) | |
81 | ||
82 | cfi_remember_state | |
83 | ||
84 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes | |
85 | orr r1, r1, r1, lsl #16 | |
86 | bic r4, r2, #7 @ Number of double words to work with * 8 | |
87 | mvns r7, #0 @ all F's | |
88 | movs r3, #0 | |
89 | ||
90 | 15: | |
91 | ldrd r5,r6, [r0],#8 | |
92 | subs r4, r4, #8 | |
93 | eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target | |
94 | eor r6,r6, r1 | |
95 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 | |
96 | sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION | |
97 | uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 | |
98 | sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION | |
99 | cbnz r6, 60f | |
100 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again | |
101 | ||
102 | pop {r4,r5,r6,r7} | |
103 | cfi_adjust_cfa_offset (-16) | |
104 | cfi_restore (r4) | |
105 | cfi_restore (r5) | |
106 | cfi_restore (r6) | |
107 | cfi_restore (r7) | |
108 | ||
109 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above | |
110 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done | |
111 | ||
112 | 20: | |
113 | cbz r2, 40f @ 0 length or hit the end already then not found | |
114 | ||
115 | 21: @ Post aligned section, or just a short call | |
116 | ldrb r3,[r0],#1 | |
117 | subs r2,r2,#1 | |
118 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub | |
119 | cbz r3, 50f | |
120 | bne 21b @ on r2 flags | |
121 | ||
122 | 40: | |
123 | movs r0,#0 @ not found | |
124 | DO_RET(lr) | |
125 | ||
126 | 50: | |
127 | subs r0,r0,#1 @ found | |
128 | DO_RET(lr) | |
129 | ||
130 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was | |
131 | @ r0 points to the start of the double word after the one that was tested | |
132 | @ r5 has the 00/ff pattern for the first word, r6 has the chained value | |
133 | cfi_restore_state | |
134 | cmp r5, #0 | |
135 | itte eq | |
136 | moveq r5, r6 @ the end is in the 2nd word | |
137 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word | |
138 | subne r0,r0,#7 @ or 2nd byte of 1st word | |
139 | ||
140 | @ r0 currently points to the 2nd byte of the word containing the hit | |
141 | tst r5, # CHARTSTMASK(0) @ 1st character | |
142 | bne 61f | |
143 | adds r0,r0,#1 | |
144 | tst r5, # CHARTSTMASK(1) @ 2nd character | |
145 | ittt eq | |
146 | addeq r0,r0,#1 | |
147 | tsteq r5, # (3<<15) @ 2nd & 3rd character | |
148 | @ If not the 3rd must be the last one | |
149 | addeq r0,r0,#1 | |
150 | ||
151 | 61: | |
152 | pop {r4,r5,r6,r7} | |
153 | cfi_adjust_cfa_offset (-16) | |
154 | cfi_restore (r4) | |
155 | cfi_restore (r5) | |
156 | cfi_restore (r6) | |
157 | cfi_restore (r7) | |
158 | ||
159 | subs r0,r0,#1 | |
160 | DO_RET(lr) | |
161 | ||
162 | END(memchr) | |
163 | libc_hidden_builtin_def (memchr) |