]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/power4/strncmp.S
Fix POWER4/POWER7 optimized strncmp to not read past differing bytes
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / power4 / strncmp.S
1 /* Optimized strcmp implementation for PowerPC64.
2 Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
18 02110-1301 USA. */
19
20 #include <sysdep.h>
21 #include <bp-sym.h>
22 #include <bp-asm.h>
23
24 /* See strlen.s for comments on how the end-of-string testing works. */
25
26 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
27
28 EALIGN (BP_SYM(strncmp), 4, 0)
29 CALL_MCOUNT 3
30
31 #define rTMP r0
32 #define rRTN r3
33 #define rSTR1 r3 /* first string arg */
34 #define rSTR2 r4 /* second string arg */
35 #define rN r5 /* max string length */
36 /* Note: The Bounded pointer support in this code is broken. This code
37 was inherited from PPC32 and and that support was never completed.
38 Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */
39 #define rWORD1 r6 /* current word in s1 */
40 #define rWORD2 r7 /* current word in s2 */
41 #define rWORD3 r10
42 #define rWORD4 r11
43 #define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
44 #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
45 #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
46 #define rBITDIF r11 /* bits that differ in s1 & s2 words */
47
48 dcbt 0,rSTR1
49 or rTMP, rSTR2, rSTR1
50 lis r7F7F, 0x7f7f
51 dcbt 0,rSTR2
52 clrldi. rTMP, rTMP, 61
53 cmpldi cr1, rN, 0
54 lis rFEFE, -0x101
55 bne L(unaligned)
56 /* We are doubleword alligned so set up for two loops. first a double word
57 loop, then fall into the byte loop if any residual. */
58 srdi. rTMP, rN, 3
59 clrldi rN, rN, 61
60 addi rFEFE, rFEFE, -0x101
61 addi r7F7F, r7F7F, 0x7f7f
62 cmpldi cr1, rN, 0
63 beq L(unaligned)
64
65 mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
66 ld rWORD1, 0(rSTR1)
67 ld rWORD2, 0(rSTR2)
68 sldi rTMP, rFEFE, 32
69 insrdi r7F7F, r7F7F, 32, 0
70 add rFEFE, rFEFE, rTMP
71 b L(g1)
72
73 L(g0):
74 ldu rWORD1, 8(rSTR1)
75 bne- cr1, L(different)
76 ldu rWORD2, 8(rSTR2)
77 L(g1): add rTMP, rFEFE, rWORD1
78 nor rNEG, r7F7F, rWORD1
79 bdz L(tail)
80 and. rTMP, rTMP, rNEG
81 cmpd cr1, rWORD1, rWORD2
82 beq+ L(g0)
83
84 /* OK. We've hit the end of the string. We need to be careful that
85 we don't compare two strings as different because of gunk beyond
86 the end of the strings... */
87
88 L(endstring):
89 and rTMP, r7F7F, rWORD1
90 beq cr1, L(equal)
91 add rTMP, rTMP, r7F7F
92 xor. rBITDIF, rWORD1, rWORD2
93
94 andc rNEG, rNEG, rTMP
95 blt- L(highbit)
96 cntlzd rBITDIF, rBITDIF
97 cntlzd rNEG, rNEG
98 addi rNEG, rNEG, 7
99 cmpd cr1, rNEG, rBITDIF
100 sub rRTN, rWORD1, rWORD2
101 blt- cr1, L(equal)
102 sradi rRTN, rRTN, 63
103 ori rRTN, rRTN, 1
104 blr
105 L(equal):
106 li rRTN, 0
107 blr
108
109 L(different):
110 ldu rWORD1, -8(rSTR1)
111 xor. rBITDIF, rWORD1, rWORD2
112 sub rRTN, rWORD1, rWORD2
113 blt- L(highbit)
114 sradi rRTN, rRTN, 63
115 ori rRTN, rRTN, 1
116 blr
117 L(highbit):
118 srdi rWORD2, rWORD2, 56
119 srdi rWORD1, rWORD1, 56
120 sub rRTN, rWORD1, rWORD2
121 blr
122
123
124 /* Oh well. In this case, we just do a byte-by-byte comparison. */
125 .align 4
126 L(tail):
127 and. rTMP, rTMP, rNEG
128 cmpd cr1, rWORD1, rWORD2
129 bne- L(endstring)
130 addi rSTR1, rSTR1, 8
131 bne- cr1, L(different)
132 addi rSTR2, rSTR2, 8
133 cmpldi cr1, rN, 0
134 L(unaligned):
135 mtctr rN /* Power4 wants mtctr 1st in dispatch group */
136 ble cr1, L(ux)
137 L(uz):
138 lbz rWORD1, 0(rSTR1)
139 lbz rWORD2, 0(rSTR2)
140 .align 4
141 L(u1):
142 cmpdi cr1, rWORD1, 0
143 bdz L(u4)
144 cmpd rWORD1, rWORD2
145 beq- cr1, L(u4)
146 bne- L(u4)
147 lbzu rWORD3, 1(rSTR1)
148 lbzu rWORD4, 1(rSTR2)
149 cmpdi cr1, rWORD3, 0
150 bdz L(u3)
151 cmpd rWORD3, rWORD4
152 beq- cr1, L(u3)
153 bne- L(u3)
154 lbzu rWORD1, 1(rSTR1)
155 lbzu rWORD2, 1(rSTR2)
156 cmpdi cr1, rWORD1, 0
157 bdz L(u4)
158 cmpd rWORD1, rWORD2
159 beq- cr1, L(u4)
160 bne- L(u4)
161 lbzu rWORD3, 1(rSTR1)
162 lbzu rWORD4, 1(rSTR2)
163 cmpdi cr1, rWORD3, 0
164 bdz L(u3)
165 cmpd rWORD3, rWORD4
166 beq- cr1, L(u3)
167 bne- L(u3)
168 lbzu rWORD1, 1(rSTR1)
169 lbzu rWORD2, 1(rSTR2)
170 b L(u1)
171
172 L(u3): sub rRTN, rWORD3, rWORD4
173 blr
174 L(u4): sub rRTN, rWORD1, rWORD2
175 blr
176 L(ux):
177 li rRTN, 0
178 blr
179 END (BP_SYM (strncmp))
180 libc_hidden_builtin_def (strncmp)
181