]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/s390/multiarch/wcsrchr-vx.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / s390 / multiarch / wcsrchr-vx.S
1 /* Vector optimized 32/64 bit S/390 version of wcsrchr.
2 Copyright (C) 2015-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
20
21 # include "sysdep.h"
22 # include "asm-syntax.h"
23
24 .text
25
26 /* wchar_t *wcsrchr (const wchar_t *s, wchar_t c)
27 Locate the last character c in string.
28
29 Register usage:
30 -r0=loaded bytes in first part of s.
31 -r1=pointer to last occurence of c or NULL if not found.
32 -r2=s
33 -r3=c
34 -r4=tmp
35 -r5=current_len
36 -v16=part of s
37 -v17=index of found element
38 -v18=replicated c
39 -v19=part of s with last occurence of c.
40 -v20=permute pattern
41 */
42 ENTRY(__wcsrchr_vx)
43 .machine "z13"
44 .machinemode "zarch_nohighgprs"
45
46 vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */
47 lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */
48
49 tmll %r2,3 /* Test if s is 4-byte aligned? */
50 jne .Lfallback /* And use common-code variant if not. */
51
52 vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */
53 vrepf %v18,%v18,0
54
55 lghi %r1,-1 /* Currently no c found. */
56 lghi %r5,0 /* current_len = 0. */
57
58 vfeezfs %v17,%v16,%v18 /* Find element equal or zero. */
59 vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */
60 clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */
61 .Lalign:
62 /* Align s to 16 byte. */
63 risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */
64 lghi %r5,16 /* current_len = 16. */
65 slr %r5,%r4 /* Compute bytes to 16bytes boundary. */
66
67 .Lloop:
68 vl %v16,0(%r5,%r2) /* Load s. */
69 vfeezfs %v17,%v16,%v18 /* Find element equal with zero search. */
70 jno .Lfound /* Found c/zero (cc=0|1|2). */
71 vl %v16,16(%r5,%r2)
72 vfeezfs %v17,%v16,%v18
73 jno .Lfound16
74 vl %v16,32(%r5,%r2)
75 vfeezfs %v17,%v16,%v18
76 jno .Lfound32
77 vl %v16,48(%r5,%r2)
78 vfeezfs %v17,%v16,%v18
79 jno .Lfound48
80
81 aghi %r5,64
82 j .Lloop /* No character and no zero -> loop. */
83
84 .Lfound48:
85 la %r5,16(%r5) /* Use la since aghi would clobber cc. */
86 .Lfound32:
87 la %r5,16(%r5)
88 .Lfound16:
89 la %r5,16(%r5)
90 .Lfound:
91 je .Lzero /* Found zero, but no c before that zero. */
92 /* Save this part of s to check for further matches after reaching
93 the end of the complete string. */
94 vlr %v19,%v16
95 lgr %r1,%r5
96
97 jh .Lzero /* Found a zero after the found c. */
98 aghi %r5,16 /* Start search of next part of s. */
99 j .Lloop
100
101 .Lfound_first_part:
102 /* This code is only executed if the found c/zero is whithin loaded
103 bytes. If no c/zero was found (cc==3) the found index = 16, thus
104 this code is not called.
105 Resulting condition code of vector find element equal:
106 cc==0: no c, found zero
107 cc==1: c found, no zero
108 cc==2: c found, found zero after c
109 cc==3: no c, no zero (this case can be ignored). */
110 je .Lzero /* Found zero, but no c before that zero. */
111
112 locgrne %r1,%r5 /* Mark c as found in first part of s. */
113 vlr %v19,%v16
114
115 jl .Lalign /* No zero (e.g. if vr was fully loaded)
116 -> Align and loop afterwards. */
117
118 /* Found a zero in vr. If vr was not fully loaded due to block
119 boundary, the remaining bytes are filled with zero and we can't
120 rely on zero indication of condition code here! */
121
122 vfenezf %v17,%v16,%v16
123 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
124 clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */
125 j .Lalign /* Align and loop afterwards. */
126
127 .Lend_searched_zero:
128 vlgvb %r4,%v17,7 /* Load byte index of zero. */
129 algr %r5,%r4
130 la %r2,0(%r5,%r2) /* Return pointer to zero. */
131 br %r14
132
133 .Lzero:
134 /* Reached end of string. Check if one c was found before. */
135 clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */
136
137 cgfi %r1,-1 /* No c found -> return NULL. */
138 locghie %r2,0
139 ber %r14
140
141 larl %r3,.Lpermute_mask /* Load permute mask. */
142 vl %v20,0(%r3)
143
144 /* c was found and is part of v19. */
145 vfenezf %v17,%v19,%v19 /* Find zero. */
146 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
147 ahi %r4,3 /* Found zero index is first byte,
148 thus highest byte index is last byte of
149 wchar_t zero. */
150
151 clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */
152 lochine %r0,16 /* ... if v19 is not the first part of s. */
153 ahi %r0,-1 /* Convert byte count to highest index. */
154
155 clr %r0,%r4
156 locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */
157
158 /* Right-shift of v19 to mask bytes after zero. */
159 clije %r4,15,.Lzero_permute /* No shift is needed if highest index
160 in vr is 15. */
161 lhi %r0,15
162 slr %r0,%r4 /* Compute byte count for vector shift left. */
163 sll %r0,3 /* Convert to bit count. */
164 vlvgb %v17,%r0,7
165 vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes
166 specified in bits 1-4 of byte 7 in v17. */
167
168 /* Reverse bytes in v19. */
169 .Lzero_permute:
170 vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */
171
172 /* Find c in reversed v19. */
173 vfeef %v19,%v19,%v18 /* Find c. */
174 la %r2,0(%r1,%r2)
175 vlgvb %r3,%v19,7 /* Load byte index of c. */
176
177 /* Compute index in real s and return. */
178 slgr %r4,%r3
179 lay %r2,-3(%r4,%r2) /* Return pointer to zero. -3 is needed,
180 because the found byte index is reversed in
181 vector-register. Thus point to first byte of
182 wchar_t. */
183 br %r14
184 .Lpermute_mask:
185 .byte 0x0C,0x0D,0x0E,0x0F,0x08,0x09,0x0A,0x0B
186 .byte 0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03
187 .Lfallback:
188 jg __wcsrchr_c
189 END(__wcsrchr_vx)
190 #endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */