]>
Commit | Line | Data |
---|---|---|
e23d3d26 | 1 | /* Optimized strcmp implementation for Power7 using 'cmpb' instruction |
688903eb | 2 | Copyright (C) 2014-2018 Free Software Foundation, Inc. |
e23d3d26 VR |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | /* The optimization is achieved here through cmpb instruction. | |
20 | 8byte aligned strings are processed with double word comparision | |
21 | and unaligned strings are handled effectively with loop unrolling | |
22 | technique */ | |
23 | ||
24 | #include <sysdep.h> | |
25 | ||
3bc426e1 WSM |
26 | #ifndef STRCMP |
27 | # define STRCMP strcmp | |
28 | #endif | |
29 | ||
e23d3d26 VR |
30 | /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ |
31 | ||
72607db0 | 32 | .machine power7 |
d5b41185 | 33 | ENTRY_TOCLESS (STRCMP, 4) |
e23d3d26 VR |
34 | CALL_MCOUNT 2 |
35 | ||
36 | or r9, r3, r4 | |
37 | rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ | |
38 | bne cr0, L(process_unaligned_bytes) | |
72607db0 | 39 | li r5, 0 |
e23d3d26 | 40 | |
72607db0 | 41 | .align 4 |
e23d3d26 | 42 | /* process input parameters on double word aligned boundary */ |
e23d3d26 | 43 | L(unrollDword): |
72607db0 RS |
44 | ld r8,0(r3) |
45 | ld r10,0(r4) | |
46 | cmpb r7,r8,r5 | |
47 | cmpdi cr7,r7,0 | |
48 | mr r9,r7 | |
49 | bne cr7,L(null_found) | |
50 | cmpld cr7,r8,r10 | |
51 | bne cr7,L(different) | |
52 | ||
53 | ld r8,8(r3) | |
54 | ld r10,8(r4) | |
55 | cmpb r7,r8,r5 | |
56 | cmpdi cr7,r7,0 | |
57 | mr r9,r7 | |
58 | bne cr7,L(null_found) | |
59 | cmpld cr7,r8,r10 | |
60 | bne cr7,L(different) | |
61 | ||
62 | ld r8,16(r3) | |
63 | ld r10,16(r4) | |
64 | cmpb r7,r8,r5 | |
65 | cmpdi cr7,r7,0 | |
66 | mr r9,r7 | |
67 | bne cr7,L(null_found) | |
68 | cmpld cr7,r8,r10 | |
69 | bne cr7,L(different) | |
70 | ||
71 | ld r8,24(r3) | |
72 | ld r10,24(r4) | |
73 | cmpb r7,r8,r5 | |
74 | cmpdi cr7,r7,0 | |
75 | mr r9,r7 | |
76 | bne cr7,L(null_found) | |
77 | cmpld cr7,r8,r10 | |
78 | bne cr7,L(different) | |
79 | ||
80 | addi r3, r3, 32 | |
81 | addi r4, r4, 32 | |
82 | beq cr7, L(unrollDword) | |
83 | ||
84 | .align 4 | |
85 | L(null_found): | |
86 | #ifdef __LITTLE_ENDIAN__ | |
87 | neg r7,r9 | |
88 | and r9,r9,r7 | |
89 | li r7,-1 | |
90 | cntlzd r9,r9 | |
91 | subfic r9,r9,71 | |
92 | sld r9,r7,r9 | |
93 | #else | |
94 | cntlzd r9,r9 | |
95 | li r7,-1 | |
96 | addi r9,r9,8 | |
97 | srd r9,r7,r9 | |
98 | #endif | |
99 | or r8,r8,r9 | |
100 | or r10,r10,r9 | |
101 | ||
102 | L(different): | |
103 | cmpb r9,r8,r10 | |
104 | #ifdef __LITTLE_ENDIAN__ | |
105 | addi r7,r9,1 | |
106 | andc r9,r7,r9 | |
107 | cntlzd r9,r9 | |
108 | subfic r9,r9,63 | |
109 | #else | |
110 | not r9,r9 | |
111 | cntlzd r9,r9 | |
112 | subfic r9,r9,56 | |
113 | #endif | |
114 | srd r3,r8,r9 | |
115 | srd r10,r10,r9 | |
116 | rldicl r10,r10,0,56 | |
117 | rldicl r3,r3,0,56 | |
118 | subf r3,r10,r3 | |
119 | blr | |
120 | ||
121 | .align 4 | |
e23d3d26 VR |
122 | L(process_unaligned_bytes): |
123 | lbz r9, 0(r3) /* load byte from s1 */ | |
124 | lbz r10, 0(r4) /* load byte from s2 */ | |
125 | cmpdi cr7, r9, 0 /* compare *s1 with NULL */ | |
126 | beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ | |
127 | cmplw cr7, r9, r10 /* compare *s1 and *s2 */ | |
128 | bne cr7, L(ComputeDiff) /* branch to compute difference and return */ | |
129 | ||
130 | lbz r9, 1(r3) /* load next byte from s1 */ | |
131 | lbz r10, 1(r4) /* load next byte from s2 */ | |
132 | cmpdi cr7, r9, 0 /* compare *s1 with NULL */ | |
133 | beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ | |
134 | cmplw cr7, r9, r10 /* compare *s1 and *s2 */ | |
135 | bne cr7, L(ComputeDiff) /* branch to compute difference and return */ | |
136 | ||
137 | lbz r9, 2(r3) /* unroll 3rd byte here */ | |
138 | lbz r10, 2(r4) | |
139 | cmpdi cr7, r9, 0 | |
140 | beq cr7, L(diffOfNULL) | |
141 | cmplw cr7, r9, r10 | |
142 | bne 7, L(ComputeDiff) | |
143 | ||
144 | lbz r9, 3(r3) /* unroll 4th byte now */ | |
145 | lbz r10, 3(r4) | |
146 | addi r3, r3, 4 /* increment s1 by unroll factor */ | |
147 | cmpdi cr7, r9, 0 | |
148 | cmplw cr6, 9, r10 | |
149 | beq cr7, L(diffOfNULL) | |
150 | addi r4, r4, 4 /* increment s2 by unroll factor */ | |
151 | beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ | |
152 | ||
72607db0 | 153 | .align 4 |
e23d3d26 VR |
154 | L(ComputeDiff): |
155 | extsw r9, r9 | |
156 | subf r10, r10, r9 /* compute s1 - s2 */ | |
157 | extsw r3, r10 | |
158 | blr /* return */ | |
159 | ||
72607db0 | 160 | .align 4 |
e23d3d26 VR |
161 | L(diffOfNULL): |
162 | li r9, 0 | |
163 | subf r10, r10, r9 /* compute s1 - s2 */ | |
164 | extsw r3, r10 /* sign extend result */ | |
165 | blr /* return */ | |
166 | ||
3bc426e1 | 167 | END (STRCMP) |
e23d3d26 | 168 | libc_hidden_builtin_def (strcmp) |