]>
Commit | Line | Data |
---|---|---|
6f47401b | 1 | /* Vector optimized 32/64 bit S/390 version of strstr. |
d614a753 | 2 | Copyright (C) 2019-2020 Free Software Foundation, Inc. |
6f47401b SL |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
6f47401b SL |
18 | |
19 | #include <ifunc-strstr.h> | |
20 | #if HAVE_STRSTR_ARCH13 | |
21 | # include "sysdep.h" | |
22 | # include "asm-syntax.h" | |
23 | .text | |
24 | ||
25 | /* char *strstr (const char *haystack=r2, const char *needle=r3) | |
26 | Locate a substring. */ | |
27 | ENTRY(STRSTR_ARCH13) | |
28 | .machine "arch13" | |
29 | .machinemode "zarch_nohighgprs" | |
30 | lcbb %r1,0(%r3),6 | |
31 | jo .Lneedle_on_bb /* Needle on block-boundary? */ | |
32 | vl %v18,0(%r3),6 /* Load needle. */ | |
33 | vfenezb %v19,%v18,%v18 /* v19[7] contains the length of needle. */ | |
34 | .Lneedle_loaded: | |
35 | vlgvb %r4,%v19,7 /* Get index of zero or 16 if not found. */ | |
36 | lghi %r5,17 /* See below: min-skip-partial-match-index. */ | |
37 | cgibe %r4,0,0(%r14) /* Test if needle is zero and return. */ | |
38 | ||
39 | /* The vstrs instruction is able to handle needles up to a length of 16, | |
40 | but then we may have to load the next part of haystack with a | |
41 | small offset. This will be slow - see examples: | |
42 | haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma | |
43 | needle = mmmmmmmmmmmmmma0 | |
44 | => needle_len=15; vstrs reports a partial match; haystack+=2 | |
45 | haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma | |
46 | needle = mmmmmmmma0000000 | |
47 | => needle_len=9; vstrs reports a partial match; haystack+=8 */ | |
48 | # if ! HAVE_STRSTR_Z13 | |
49 | # error The arch13 variant of strstr needs the z13 variant of strstr! | |
50 | # endif | |
51 | clgfi %r4,9 | |
52 | jh STRSTR_Z13 | |
53 | ||
54 | /* In case of a partial match, the vstrs instruction returns the index | |
55 | of the partial match in a vector-register. Then we have to | |
56 | reload the string at the "current-position plus this index" and run | |
57 | vstrs again in order to determine if it was a full match or no match. | |
58 | Transferring this index from vr to gr, compute the haystack-address | |
59 | and loading with vl is quite slow as all instructions have data | |
60 | dependencies. Thus we assume, that a partial match is always at the | |
61 | first possible index and just load the next part of haystack from | |
62 | there instead of waiting until the correct index is computed: | |
63 | min-skip-partial-match-index = (16 - n_len) + 1 */ | |
64 | sgr %r5,%r4 | |
65 | ||
66 | .Lloop: | |
67 | lcbb %r1,0(%r2),6 | |
68 | jo .Lloop_haystack_on_bb /* Haystack on block-boundary? */ | |
69 | vl %v16,0(%r2) /* Load next part of haystack. */ | |
70 | .Lloop_haystack_loaded: | |
71 | /* Vector string search with zero search (cc=0 => no match). */ | |
72 | vstrs %v20,%v16,%v18,%v19,0,2 | |
73 | jne .Lloop_vstrs_nonzero_cc | |
74 | lcbb %r1,16(%r2),6 /* Next part of haystack. */ | |
75 | jo .Lloop_haystack_on_bb16 | |
76 | vl %v16,16(%r2) | |
77 | vstrs %v20,%v16,%v18,%v19,0,2 | |
78 | jne .Lloop_vstrs_nonzero_cc16 | |
79 | lcbb %r1,32(%r2),6 /* Next part of haystack. */ | |
80 | jo .Lloop_haystack_on_bb32 | |
81 | vl %v16,32(%r2) | |
82 | vstrs %v20,%v16,%v18,%v19,0,2 | |
83 | jne .Lloop_vstrs_nonzero_cc32 | |
84 | lcbb %r1,48(%r2),6 /* Next part of haystack. */ | |
85 | jo .Lloop_haystack_on_bb48 | |
86 | vl %v16,48(%r2) | |
87 | vstrs %v20,%v16,%v18,%v19,0,2 | |
88 | jne .Lloop_vstrs_nonzero_cc48 | |
89 | la %r2,64(%r2) | |
90 | j .Lloop | |
91 | ||
92 | .Lloop_vstrs_nonzero_cc48: | |
93 | la %r2,16(%r2) | |
94 | .Lloop_vstrs_nonzero_cc32: | |
95 | la %r2,16(%r2) | |
96 | .Lloop_vstrs_nonzero_cc16: | |
97 | la %r2,16(%r2) | |
98 | .Lloop_vstrs_nonzero_cc: | |
99 | jh .Lend_match_found /* cc == 2 (full match) */ | |
100 | jl .Lend_no_match /* cc == 1 (no match, end of string) */ | |
101 | /* cc == 3 (partial match) See above: min-skip-partial-match-index! */ | |
102 | lcbb %r1,0(%r5,%r2),6 | |
103 | la %r2,0(%r5,%r2) | |
104 | jo .Lloop_haystack_on_bb | |
105 | vl %v16,0(%r2) | |
106 | vstrs %v20,%v16,%v18,%v19,0,2 | |
107 | .Lloop_vstrs_nonzero_cc_loop: | |
108 | jh .Lend_match_found | |
109 | jl .Lend_no_match | |
110 | la %r2,0(%r5,%r2) | |
111 | je .Lloop | |
112 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ | |
113 | jo .Lloop_haystack_on_bb | |
114 | vl %v16,0(%r2) | |
115 | vstrs %v20,%v16,%v18,%v19,0,2 | |
116 | jh .Lend_match_found | |
117 | jl .Lend_no_match | |
118 | la %r2,0(%r5,%r2) | |
119 | je .Lloop | |
120 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ | |
121 | jo .Lloop_haystack_on_bb | |
122 | vl %v16,0(%r2) | |
123 | vstrs %v20,%v16,%v18,%v19,0,2 | |
124 | jh .Lend_match_found | |
125 | jl .Lend_no_match | |
126 | la %r2,0(%r5,%r2) | |
127 | je .Lloop | |
128 | lcbb %r1,0(%r2),6 /* Next part of haystack. */ | |
129 | jo .Lloop_haystack_on_bb | |
130 | vl %v16,0(%r2) | |
131 | vstrs %v20,%v16,%v18,%v19,0,2 | |
132 | j .Lloop_vstrs_nonzero_cc_loop | |
133 | ||
134 | .Lend_no_match: | |
135 | lghi %r2,0 | |
136 | br %r14 | |
137 | .Lend_match_found: | |
138 | vlgvb %r4,%v20,7 | |
139 | la %r2,0(%r4,%r2) | |
140 | br %r14 | |
141 | ||
142 | .Lloop_haystack_on_bb48: | |
143 | la %r2,16(%r2) | |
144 | .Lloop_haystack_on_bb32: | |
145 | la %r2,16(%r2) | |
146 | .Lloop_haystack_on_bb16: | |
147 | la %r2,16(%r2) | |
148 | .Lloop_haystack_on_bb: | |
149 | /* Haystack located on page-boundary. */ | |
150 | ahi %r1,-1 /* vll needs highest index instead of count. */ | |
151 | vll %v16,%r1,0(%r2) | |
152 | vlvgb %v21,%r1,7 | |
153 | vfenezb %v17,%v16,%v16 /* Search zero in loaded haystack bytes. */ | |
154 | veclb %v17,%v21 /* Zero index <= loaded byte index? */ | |
155 | jle .Lloop_haystack_loaded /* -> v16 contains full haystack. */ | |
156 | vl %v16,0(%r2) /* Load haystack beyond page boundary. */ | |
157 | j .Lloop_haystack_loaded | |
158 | ||
159 | .Lneedle_on_bb: | |
160 | /* Needle located on page-boundary. */ | |
161 | ahi %r1,-1 /* vll needs highest index instead of count. */ | |
162 | vll %v18,%r1,0(%r3) | |
163 | vlvgb %v21,%r1,7 | |
164 | vfenezb %v19,%v18,%v18 /* Search zero in loaded needle bytes. */ | |
165 | veclb %v19,%v21 /* Zero index <= max loaded byte index? */ | |
166 | jle .Lneedle_loaded /* -> v18 contains full needle. */ | |
bfdb7314 | 167 | vl %v18,0(%r3) /* Load needle beyond page boundary. */ |
6f47401b SL |
168 | vfenezb %v19,%v18,%v18 |
169 | j .Lneedle_loaded | |
170 | END(STRSTR_ARCH13) | |
171 | ||
172 | # if ! HAVE_STRSTR_IFUNC | |
173 | strong_alias (STRSTR_ARCH13, strstr) | |
174 | # endif | |
175 | ||
176 | # if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) | |
177 | strong_alias (STRSTR_ARCH13, __GI_strstr) | |
178 | # endif | |
179 | #endif |