]>
Commit | Line | Data |
---|---|---|
421749d6 | 1 | /* Vector optimized 32/64 bit S/390 version of memmem. |
d614a753 | 2 | Copyright (C) 2019-2020 Free Software Foundation, Inc. |
421749d6 SL |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
421749d6 SL |
18 | |
19 | #include <ifunc-memmem.h> | |
20 | #if HAVE_MEMMEM_ARCH13 | |
21 | # include "sysdep.h" | |
22 | # include "asm-syntax.h" | |
23 | .text | |
24 | ||
25 | /* void *memmem(const void *haystack=r2, size_t haystacklen=r3, | |
26 | const void *needle=r4, size_t needlelen=r5); | |
27 | Locate a substring. */ | |
28 | ENTRY(MEMMEM_ARCH13) | |
29 | .machine "arch13" | |
30 | .machinemode "zarch_nohighgprs" | |
31 | # if ! defined __s390x__ | |
32 | llgfr %r3,%r3 | |
33 | llgfr %r5,%r5 | |
34 | llgfr %r4,%r4 | |
35 | llgfr %r2,%r2 | |
36 | # endif /* ! defined __s390x__ */ | |
37 | clgrjl %r3,%r5,.Lend_no_match /* Haystack < needle? */ | |
38 | ||
39 | /* Jump to fallback if needle > 9. See also strstr-arch13.S. */ | |
40 | # if ! HAVE_MEMMEM_Z13 | |
41 | # error The arch13 variant of memmem needs the z13 variant of memmem! | |
42 | # endif | |
43 | clgfi %r5,9 | |
44 | jh MEMMEM_Z13 | |
45 | ||
46 | aghik %r0,%r5,-1 /* vll needs highest index. */ | |
47 | bc 4,0(%r14) /* cc==1: return if needle-len == 0. */ | |
48 | vll %v18,%r0,0(%r4) /* Load needle. */ | |
49 | vlvgb %v19,%r5,7 /* v19[7] contains length of needle. */ | |
50 | ||
51 | clgijh %r3,16,.Lhaystack_larger_16 | |
52 | .Lhaystack_smaller_16_on_bb: | |
53 | aghik %r0,%r3,-1 /* vll needs highest index. */ | |
54 | vll %v16,%r0,0(%r2) /* Load haystack. */ | |
55 | .Lhaystack_smaller_16: | |
56 | sgr %r3,%r5 /* r3 = largest valid match-index. */ | |
57 | jl .Lend_no_match /* Haystack-len < needle-len? */ | |
58 | vstrs %v20,%v16,%v18,%v19,0,0 | |
59 | /* Vector string search without zero search where v20 will contain | |
60 | the index of a partial/full match or 16 (index is named k). | |
61 | cc=0 (no match; k=16): .Lend_no_match | |
62 | cc=1 (only available with zero-search): Ignore | |
63 | cc=2 (full match; k<16): Needle found, but could be beyond haystack! | |
64 | cc=3 (partial match; k<16): Always at end of v16 and thus beyond! */ | |
65 | brc 9,.Lend_no_match /* Jump away if cc == 0 || cc == 3. */ | |
66 | vlgvb %r1,%v20,7 | |
67 | /* Verify that the full-match (cc=2) is valid! */ | |
68 | clgrjh %r1,%r3,.Lend_no_match /* Jump away if match is beyond. */ | |
69 | la %r2,0(%r1,%r2) | |
70 | br %r14 | |
71 | .Lend_no_match: | |
72 | lghi %r2,0 | |
73 | br %r14 | |
74 | ||
75 | .Lhaystack_larger_16: | |
76 | vl %v16,0(%r2) | |
77 | lghi %r1,17 | |
78 | lay %r4,-16(%r3,%r2) /* Boundary for loading with vl. */ | |
79 | lay %r0,-64(%r3,%r2) /* Boundary for loading with 4xvl. */ | |
80 | /* See also strstr-arch13.S: | |
81 | min-skip-partial-match-index = (16 - n_len) + 1 */ | |
82 | sgr %r1,%r5 | |
83 | clgfi %r3,64 /* Set Boundary to zero ... */ | |
84 | la %r3,0(%r3,%r2) | |
85 | locghil %r0,0 /* ... if haystack < 64bytes. */ | |
86 | jh .Lloop64 | |
87 | .Lloop: | |
88 | la %r2,16(%r2) | |
89 | /* Vector string search with zero search. cc=0 => no match. */ | |
90 | vstrs %v20,%v16,%v18,%v19,0,0 | |
91 | jne .Lloop_vstrs_nonzero_cc | |
92 | clgrjh %r2,%r4,.Lhaystack_too_small | |
93 | .Lloop16: | |
94 | vl %v16,0(%r2) | |
95 | la %r2,16(%r2) | |
96 | vstrs %v20,%v16,%v18,%v19,0,0 | |
97 | jne .Lloop_vstrs_nonzero_cc | |
98 | clgrjle %r2,%r4,.Lloop16 | |
99 | .Lhaystack_too_small: | |
100 | sgr %r3,%r2 /* r3 = (haystack + len) - curr_pos */ | |
101 | je .Lend_no_match /* Remaining haystack is empty. */ | |
102 | lcbb %r0,0(%r2),6 | |
103 | jo .Lhaystack_smaller_16_on_bb | |
104 | vl %v16,0(%r2) /* Load haystack. */ | |
105 | j .Lhaystack_smaller_16 | |
106 | ||
107 | .Lend_match_found: | |
108 | vlgvb %r4,%v20,7 | |
109 | sgr %r2,%r1 | |
110 | la %r2,0(%r4,%r2) | |
111 | br %r14 | |
112 | ||
113 | .Lloop_vstrs_nonzero_cc32: | |
114 | la %r2,16(%r2) | |
115 | .Lloop_vstrs_nonzero_cc16: | |
116 | la %r2,16(%r2) | |
117 | .Lloop_vstrs_nonzero_cc0: | |
118 | la %r2,16(%r2) | |
119 | .Lloop_vstrs_nonzero_cc: | |
120 | lay %r2,-16(%r1,%r2) /* Compute next load address. */ | |
121 | jh .Lend_match_found /* cc == 2 (full match) */ | |
122 | clgrjh %r2,%r4,.Lhaystack_too_small | |
123 | vl %v16,0(%r2) | |
124 | .Lloop_vstrs_nonzero_cc_loop: | |
125 | la %r2,0(%r1,%r2) | |
126 | vstrs %v20,%v16,%v18,%v19,0,0 | |
127 | jh .Lend_match_found | |
128 | clgrjh %r2,%r4,.Lhaystack_too_small | |
129 | vl %v16,0(%r2) /* Next part of haystack. */ | |
130 | jo .Lloop_vstrs_nonzero_cc_loop | |
131 | /* Case: no-match. */ | |
132 | clgrjh %r2,%r0,.Lloop /* Jump away if haystack has less than 64b. */ | |
133 | .Lloop64: | |
134 | vstrs %v20,%v16,%v18,%v19,0,0 | |
135 | jne .Lloop_vstrs_nonzero_cc0 | |
136 | vl %v16,16(%r2) /* Next part of haystack. */ | |
137 | vstrs %v20,%v16,%v18,%v19,0,0 | |
138 | jne .Lloop_vstrs_nonzero_cc16 | |
139 | vl %v16,32(%r2) /* Next part of haystack. */ | |
140 | vstrs %v20,%v16,%v18,%v19,0,0 | |
141 | jne .Lloop_vstrs_nonzero_cc32 | |
142 | vl %v16,48(%r2) /* Next part of haystack. */ | |
143 | la %r2,64(%r2) | |
144 | vstrs %v20,%v16,%v18,%v19,0,0 | |
145 | jne .Lloop_vstrs_nonzero_cc | |
146 | clgrjh %r2,%r4,.Lhaystack_too_small | |
147 | vl %v16,0(%r2) /* Next part of haystack. */ | |
148 | clgrjle %r2,%r0,.Lloop64 | |
149 | j .Lloop | |
150 | END(MEMMEM_ARCH13) | |
151 | ||
152 | # if ! HAVE_MEMMEM_IFUNC | |
153 | strong_alias (MEMMEM_ARCH13, __memmem) | |
154 | weak_alias (__memmem, memmem) | |
155 | # endif | |
156 | ||
157 | # if MEMMEM_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) | |
158 | weak_alias (MEMMEM_ARCH13, __GI_memmem) | |
159 | strong_alias (MEMMEM_ARCH13, __GI___memmem) | |
160 | # endif | |
161 | #endif |