]>
Commit | Line | Data |
---|---|---|
d5efd131 MF |
1 | /* Optimized version of the standard strlen() function. |
2 | This file is part of the GNU C Library. | |
04277e02 | 3 | Copyright (C) 2000-2019 Free Software Foundation, Inc. |
d5efd131 MF |
4 | Contributed by Dan Pop <Dan.Pop@cern.ch>. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
75efb018 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
d5efd131 MF |
19 | |
20 | /* Return: the length of the input string | |
21 | ||
22 | Input: | |
23 | in0: str | |
24 | ||
25 | Look for the null character byte by byte, until we reach a word aligned | |
26 | address, then search word by word, using the czx instruction. We're | |
27 | also doing one word of read ahead, which could cause problems if the | |
28 | null character is on the last word of a page and the next page is not | |
29 | mapped in the process address space. Hence the use of the speculative | |
30 | load. | |
31 | ||
32 | This implementation assumes little endian mode. For big endian mode, | |
33 | the instruction czx1.r should be replaced by czx1.l. */ | |
34 | ||
35 | #include <sysdep.h> | |
36 | #undef ret | |
37 | ||
38 | #define saved_lc r18 | |
39 | #define str r19 | |
40 | #define pos0 r20 | |
41 | #define val1 r21 | |
42 | #define val2 r22 | |
43 | #define origadd r23 | |
44 | #define tmp r24 | |
45 | #define loopcnt r30 | |
46 | #define len ret0 | |
47 | ||
48 | ENTRY(strlen) | |
49 | .prologue | |
50 | alloc r2 = ar.pfs, 1, 0, 0, 0 | |
51 | .save ar.lc, saved_lc | |
52 | mov saved_lc = ar.lc // save the loop counter | |
53 | .body | |
0347518d | 54 | mov str = in0 |
d5efd131 MF |
55 | mov len = r0 // len = 0 |
56 | and tmp = 7, in0 // tmp = str % 8 | |
57 | ;; | |
58 | sub loopcnt = 8, tmp // loopcnt = 8 - tmp | |
59 | cmp.eq p6, p0 = tmp, r0 | |
60 | (p6) br.cond.sptk .str_aligned;; | |
61 | adds loopcnt = -1, loopcnt;; | |
62 | mov ar.lc = loopcnt | |
63 | .l1: | |
64 | ld1 val2 = [str], 1 | |
65 | ;; | |
66 | cmp.eq p6, p0 = val2, r0 | |
67 | (p6) br.cond.spnt .restore_and_exit | |
68 | adds len = 1, len | |
69 | br.cloop.dptk .l1 | |
70 | .str_aligned: | |
71 | mov origadd = str // origadd = orig | |
72 | ld8 val1 = [str], 8;; | |
73 | nop.b 0 | |
74 | nop.b 0 | |
75 | .l2: ld8.s val2 = [str], 8 // don't bomb out here | |
0347518d | 76 | czx1.r pos0 = val1 |
d5efd131 MF |
77 | ;; |
78 | cmp.ne p6, p0 = 8, pos0 | |
79 | (p6) br.cond.spnt .foundit | |
80 | chk.s val2, .recovery | |
81 | .back: | |
0347518d | 82 | mov val1 = val2 |
d5efd131 MF |
83 | br.cond.dptk .l2 |
84 | .foundit: | |
85 | sub tmp = str, origadd // tmp = crt address - orig | |
86 | add len = len, pos0;; | |
87 | add len = len, tmp;; | |
88 | adds len = -16, len | |
89 | .restore_and_exit: | |
90 | mov ar.lc = saved_lc // restore the loop counter | |
91 | br.ret.sptk.many b0 | |
92 | .recovery: | |
93 | adds str = -8, str;; | |
94 | ld8 val2 = [str], 8 // bomb out here | |
95 | br.cond.sptk .back | |
96 | END(strlen) | |
97 | libc_hidden_builtin_def (strlen) |