]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/power7/strcmp.S
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / power7 / strcmp.S
1 /* Optimized strcmp implementation for Power7 using 'cmpb' instruction
2 Copyright (C) 2014-2019 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 /* The optimization is achieved here through cmpb instruction.
20 8byte aligned strings are processed with double word comparision
21 and unaligned strings are handled effectively with loop unrolling
22 technique */
23
24 #include <sysdep.h>
25
26 #ifndef STRCMP
27 # define STRCMP strcmp
28 #endif
29
30 /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
31
32 .machine power7
33 ENTRY_TOCLESS (STRCMP, 4)
34 CALL_MCOUNT 2
35
36 or r9, r3, r4
37 rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */
38 bne cr0, L(process_unaligned_bytes)
39 li r5, 0
40
41 .align 4
42 /* process input parameters on double word aligned boundary */
43 L(unrollDword):
44 ld r8,0(r3)
45 ld r10,0(r4)
46 cmpb r7,r8,r5
47 cmpdi cr7,r7,0
48 mr r9,r7
49 bne cr7,L(null_found)
50 cmpld cr7,r8,r10
51 bne cr7,L(different)
52
53 ld r8,8(r3)
54 ld r10,8(r4)
55 cmpb r7,r8,r5
56 cmpdi cr7,r7,0
57 mr r9,r7
58 bne cr7,L(null_found)
59 cmpld cr7,r8,r10
60 bne cr7,L(different)
61
62 ld r8,16(r3)
63 ld r10,16(r4)
64 cmpb r7,r8,r5
65 cmpdi cr7,r7,0
66 mr r9,r7
67 bne cr7,L(null_found)
68 cmpld cr7,r8,r10
69 bne cr7,L(different)
70
71 ld r8,24(r3)
72 ld r10,24(r4)
73 cmpb r7,r8,r5
74 cmpdi cr7,r7,0
75 mr r9,r7
76 bne cr7,L(null_found)
77 cmpld cr7,r8,r10
78 bne cr7,L(different)
79
80 addi r3, r3, 32
81 addi r4, r4, 32
82 beq cr7, L(unrollDword)
83
84 .align 4
85 L(null_found):
86 #ifdef __LITTLE_ENDIAN__
87 neg r7,r9
88 and r9,r9,r7
89 li r7,-1
90 cntlzd r9,r9
91 subfic r9,r9,71
92 sld r9,r7,r9
93 #else
94 cntlzd r9,r9
95 li r7,-1
96 addi r9,r9,8
97 srd r9,r7,r9
98 #endif
99 or r8,r8,r9
100 or r10,r10,r9
101
102 L(different):
103 cmpb r9,r8,r10
104 #ifdef __LITTLE_ENDIAN__
105 addi r7,r9,1
106 andc r9,r7,r9
107 cntlzd r9,r9
108 subfic r9,r9,63
109 #else
110 not r9,r9
111 cntlzd r9,r9
112 subfic r9,r9,56
113 #endif
114 srd r3,r8,r9
115 srd r10,r10,r9
116 rldicl r10,r10,0,56
117 rldicl r3,r3,0,56
118 subf r3,r10,r3
119 blr
120
121 .align 4
122 L(process_unaligned_bytes):
123 lbz r9, 0(r3) /* load byte from s1 */
124 lbz r10, 0(r4) /* load byte from s2 */
125 cmpdi cr7, r9, 0 /* compare *s1 with NULL */
126 beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */
127 cmplw cr7, r9, r10 /* compare *s1 and *s2 */
128 bne cr7, L(ComputeDiff) /* branch to compute difference and return */
129
130 lbz r9, 1(r3) /* load next byte from s1 */
131 lbz r10, 1(r4) /* load next byte from s2 */
132 cmpdi cr7, r9, 0 /* compare *s1 with NULL */
133 beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */
134 cmplw cr7, r9, r10 /* compare *s1 and *s2 */
135 bne cr7, L(ComputeDiff) /* branch to compute difference and return */
136
137 lbz r9, 2(r3) /* unroll 3rd byte here */
138 lbz r10, 2(r4)
139 cmpdi cr7, r9, 0
140 beq cr7, L(diffOfNULL)
141 cmplw cr7, r9, r10
142 bne 7, L(ComputeDiff)
143
144 lbz r9, 3(r3) /* unroll 4th byte now */
145 lbz r10, 3(r4)
146 addi r3, r3, 4 /* increment s1 by unroll factor */
147 cmpdi cr7, r9, 0
148 cmplw cr6, 9, r10
149 beq cr7, L(diffOfNULL)
150 addi r4, r4, 4 /* increment s2 by unroll factor */
151 beq cr6, L(process_unaligned_bytes) /* unroll byte processing */
152
153 .align 4
154 L(ComputeDiff):
155 extsw r9, r9
156 subf r10, r10, r9 /* compute s1 - s2 */
157 extsw r3, r10
158 blr /* return */
159
160 .align 4
161 L(diffOfNULL):
162 li r9, 0
163 subf r10, r10, r9 /* compute s1 - s2 */
164 extsw r3, r10 /* sign extend result */
165 blr /* return */
166
167 END (STRCMP)
168 libc_hidden_builtin_def (strcmp)