]>
Commit | Line | Data |
---|---|---|
693918b6 | 1 | /* Optimized strcmp implementation using basic LoongArch instructions. |
dff8da6b | 2 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
82d9426e | 3 | |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library. If not, see | |
18 | <https://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <sysdep.h> | |
21 | #include <sys/regdef.h> | |
22 | #include <sys/asm.h> | |
23 | ||
24 | #if IS_IN (libc) | |
25 | # define STRCMP_NAME __strcmp_aligned | |
26 | #else | |
27 | # define STRCMP_NAME strcmp | |
28 | #endif | |
29 | ||
30 | LEAF(STRCMP_NAME, 6) | |
31 | lu12i.w a4, 0x01010 | |
32 | andi a2, a0, 0x7 | |
33 | ori a4, a4, 0x101 | |
34 | andi a3, a1, 0x7 | |
35 | ||
36 | bstrins.d a4, a4, 63, 32 | |
37 | li.d t7, -1 | |
38 | li.d t8, 8 | |
39 | slli.d a5, a4, 7 | |
40 | ||
41 | bne a2, a3, L(unaligned) | |
42 | bstrins.d a0, zero, 2, 0 | |
43 | bstrins.d a1, zero, 2, 0 | |
44 | ld.d t0, a0, 0 | |
45 | ||
46 | ld.d t1, a1, 0 | |
47 | slli.d t3, a2, 3 | |
48 | sll.d t2, t7, t3 | |
49 | orn t0, t0, t2 | |
50 | ||
51 | ||
52 | orn t1, t1, t2 | |
53 | sub.d t2, t0, a4 | |
54 | andn t3, a5, t0 | |
55 | and t2, t2, t3 | |
56 | ||
57 | bne t0, t1, L(al_end) | |
58 | L(al_loop): | |
59 | bnez t2, L(ret0) | |
60 | ldx.d t0, a0, t8 | |
61 | ldx.d t1, a1, t8 | |
62 | ||
63 | addi.d t8, t8, 8 | |
64 | sub.d t2, t0, a4 | |
65 | andn t3, a5, t0 | |
66 | and t2, t2, t3 | |
67 | ||
68 | beq t0, t1, L(al_loop) | |
69 | L(al_end): | |
70 | xor t3, t0, t1 | |
71 | or t2, t2, t3 | |
72 | ctz.d t3, t2 | |
73 | ||
74 | ||
75 | bstrins.d t3, zero, 2, 0 | |
76 | srl.d t0, t0, t3 | |
77 | srl.d t1, t1, t3 | |
78 | andi t0, t0, 0xff | |
79 | ||
80 | andi t1, t1, 0xff | |
81 | sub.d a0, t0, t1 | |
82 | jr ra | |
83 | nop | |
84 | ||
85 | L(ret0): | |
86 | move a0, zero | |
87 | jr ra | |
88 | nop | |
89 | nop | |
90 | ||
91 | L(unaligned): | |
92 | slt a6, a3, a2 | |
93 | xor t0, a0, a1 | |
94 | maskeqz t0, t0, a6 | |
95 | xor a0, a0, t0 | |
96 | ||
97 | ||
98 | xor a1, a1, t0 | |
99 | andi a2, a0, 0x7 | |
100 | andi a3, a1, 0x7 | |
101 | bstrins.d a0, zero, 2, 0 | |
102 | ||
103 | bstrins.d a1, zero, 2, 0 | |
104 | ld.d t4, a0, 0 | |
105 | ld.d t1, a1, 0 | |
106 | slli.d a2, a2, 3 | |
107 | ||
108 | slli.d a3, a3, 3 | |
109 | srl.d t0, t4, a2 | |
110 | srl.d t1, t1, a3 | |
111 | srl.d t5, t7, a3 | |
112 | ||
113 | orn t0, t0, t5 | |
114 | orn t1, t1, t5 | |
115 | bne t0, t1, L(not_equal) | |
116 | sll.d t5, t7, a2 | |
117 | ||
118 | ||
119 | sub.d a3, a2, a3 | |
120 | orn t4, t4, t5 | |
121 | sub.d a2, zero, a3 | |
122 | sub.d t2, t4, a4 | |
123 | ||
124 | andn t3, a5, t4 | |
125 | and t2, t2, t3 | |
126 | bnez t2, L(find_zero) | |
127 | L(un_loop): | |
128 | srl.d t5, t4, a3 | |
129 | ||
130 | ldx.d t4, a0, t8 | |
131 | ldx.d t1, a1, t8 | |
132 | addi.d t8, t8, 8 | |
133 | sll.d t0, t4, a2 | |
134 | ||
135 | or t0, t0, t5 | |
136 | bne t0, t1, L(not_equal) | |
137 | sub.d t2, t4, a4 | |
138 | andn t3, a5, t4 | |
139 | ||
140 | ||
141 | and t2, t2, t3 | |
142 | beqz t2, L(un_loop) | |
143 | L(find_zero): | |
144 | sub.d t2, t0, a4 | |
145 | andn t3, a5, t0 | |
146 | ||
147 | and t2, t2, t3 | |
148 | bnez t2, L(ret0) | |
149 | ldx.d t1, a1, t8 | |
150 | srl.d t0, t4, a3 | |
151 | ||
152 | L(not_equal): | |
153 | sub.d t2, t0, a4 | |
154 | andn t3, a5, t0 | |
155 | and t2, t2, t3 | |
156 | xor t3, t0, t1 | |
157 | ||
158 | or t2, t2, t3 | |
159 | L(un_end): | |
160 | ctz.d t3, t2 | |
161 | bstrins.d t3, zero, 2, 0 | |
162 | srl.d t0, t0, t3 | |
163 | ||
164 | ||
165 | srl.d t1, t1, t3 | |
166 | andi t0, t0, 0xff | |
167 | andi t1, t1, 0xff | |
168 | sub.d t2, t0, t1 | |
169 | ||
170 | ||
171 | sub.d t3, t1, t0 | |
172 | masknez t0, t2, a6 | |
173 | maskeqz t1, t3, a6 | |
174 | or a0, t0, t1 | |
175 | ||
176 | jr ra | |
177 | END(STRCMP_NAME) | |
178 | ||
179 | libc_hidden_builtin_def (STRCMP_NAME) |