]> git.ipfire.org Git - thirdparty/glibc.git/blame - string/strxfrm_l.c
* elf/dl-debug.c (_dl_debug_initialize): Check r->r_map for 0
[thirdparty/glibc.git] / string / strxfrm_l.c
CommitLineData
bc3a45ce 1/* Copyright (C) 1995,96,97,2002, 2004, 2005 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
1ab62b32 3 Written by Ulrich Drepper <drepper@gnu.org>, 1995.
c84142e8
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
c84142e8
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
c84142e8 14
41bdb6e2
AJ
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
c84142e8 19
ccadf7b5
UD
20#include <assert.h>
21#include <langinfo.h>
22#include <locale.h>
23#include <stddef.h>
24#include <stdint.h>
25#include <stdlib.h>
26#include <string.h>
27#include <sys/param.h>
1ab62b32 28
ccadf7b5
UD
29#ifndef STRING_TYPE
30# define STRING_TYPE char
31# define USTRING_TYPE unsigned char
32# define STRXFRM __strxfrm_l
33# define STRCMP strcmp
34# define STRLEN strlen
35# define STPNCPY __stpncpy
36# define WEIGHT_H "../locale/weight.h"
37# define SUFFIX MB
38# define L(arg) arg
39#endif
40
41#define CONCAT(a,b) CONCAT1(a,b)
42#define CONCAT1(a,b) a##b
43
44#include "../locale/localeinfo.h"
45
46
47#ifndef WIDE_CHAR_VERSION
48
49/* We need UTF-8 encoding of numbers. */
50static int
51utf8_encode (char *buf, int val)
52{
53 int retval;
54
55 if (val < 0x80)
56 {
57 *buf++ = (char) val;
58 retval = 1;
59 }
60 else
61 {
62 int step;
63
64 for (step = 2; step < 6; ++step)
65 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
66 break;
67 retval = step;
68
69 *buf = (unsigned char) (~0xff >> step);
70 --step;
71 do
72 {
73 buf[step] = 0x80 | (val & 0x3f);
74 val >>= 6;
75 }
76 while (--step > 0);
77 *buf |= val;
78 }
79
80 return retval;
81}
82#endif
83
84
85size_t
86STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
87{
88 struct locale_data *current = l->__locales[LC_COLLATE];
89 uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
90 /* We don't assign the following values right away since it might be
91 unnecessary in case there are no rules. */
92 const unsigned char *rulesets;
93 const int32_t *table;
94 const USTRING_TYPE *weights;
95 const USTRING_TYPE *extra;
96 const int32_t *indirect;
97 uint_fast32_t pass;
98 size_t needed;
99 const USTRING_TYPE *usrc;
100 size_t srclen = STRLEN (src);
101 int32_t *idxarr;
102 unsigned char *rulearr;
103 size_t idxmax;
104 size_t idxcnt;
105 int use_malloc;
106
107#include WEIGHT_H
108
109 if (nrules == 0)
110 {
111 if (n != 0)
112 STPNCPY (dest, src, MIN (srclen + 1, n));
113
114 return srclen;
115 }
116
117 rulesets = (const unsigned char *)
118 current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
119 table = (const int32_t *)
120 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
121 weights = (const USTRING_TYPE *)
122 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
123 extra = (const USTRING_TYPE *)
124 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
125 indirect = (const int32_t *)
126 current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
127 use_malloc = 0;
128
129 assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
130 assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
131 assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
132 assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
133
134 /* Handle an empty string as a special case. */
135 if (srclen == 0)
136 {
137 if (n != 0)
138 *dest = L('\0');
139 return 0;
140 }
141
142 /* We need the elements of the string as unsigned values since they
143 are used as indeces. */
144 usrc = (const USTRING_TYPE *) src;
145
146 /* Perform the first pass over the string and while doing this find
147 and store the weights for each character. Since we want this to
148 be as fast as possible we are using `alloca' to store the temporary
149 values. But since there is no limit on the length of the string
150 we have to use `malloc' if the string is too long. We should be
151 very conservative here. */
152 if (! __libc_use_alloca (srclen))
153 {
154 idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
155 rulearr = (unsigned char *) &idxarr[srclen];
156
157 if (idxarr == NULL)
158 /* No memory. Well, go with the stack then.
159
160 XXX Once this implementation is stable we will handle this
161 differently. Instead of precomputing the indeces we will
162 do this in time. This means, though, that this happens for
163 every pass again. */
164 goto try_stack;
165 use_malloc = 1;
166 }
167 else
168 {
169 try_stack:
170 idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
171 rulearr = (unsigned char *) alloca (srclen + 1);
172 }
173
174 idxmax = 0;
175 do
176 {
177 int32_t tmp = findidx (&usrc);
178 rulearr[idxmax] = tmp >> 24;
179 idxarr[idxmax] = tmp & 0xffffff;
180
181 ++idxmax;
182 }
183 while (*usrc != L('\0'));
184
185 /* This element is only read, the value never used but to determine
186 another value which then is ignored. */
187 rulearr[idxmax] = '\0';
188
189 /* Now the passes over the weights. We now use the indeces we found
190 before. */
191 needed = 0;
192 for (pass = 0; pass < nrules; ++pass)
193 {
194 size_t backw_stop = ~0ul;
195 int rule = rulesets[rulearr[0] * nrules + pass];
196 /* We assume that if a rule has defined `position' in one section
197 this is true for all of them. */
198 int position = rule & sort_position;
199
200 if (position == 0)
201 {
202 for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
203 {
204 if ((rule & sort_forward) != 0)
205 {
206 size_t len;
207
208 if (backw_stop != ~0ul)
209 {
210 /* Handle the pushed elements now. */
211 size_t backw;
212
bc3a45ce 213 for (backw = idxcnt; backw > backw_stop; )
ccadf7b5 214 {
bc3a45ce 215 --backw;
ccadf7b5
UD
216 len = weights[idxarr[backw]++];
217
218 if (needed + len < n)
219 while (len-- > 0)
220 dest[needed++] = weights[idxarr[backw]++];
221 else
222 {
223 /* No more characters fit into the buffer. */
224 needed += len;
225 idxarr[backw] += len;
226 }
227 }
228
229 backw_stop = ~0ul;
230 }
231
232 /* Now handle the forward element. */
233 len = weights[idxarr[idxcnt]++];
234 if (needed + len < n)
235 while (len-- > 0)
236 dest[needed++] = weights[idxarr[idxcnt]++];
237 else
238 {
239 /* No more characters fit into the buffer. */
240 needed += len;
241 idxarr[idxcnt] += len;
242 }
243 }
244 else
245 {
246 /* Remember where the backwards series started. */
247 if (backw_stop == ~0ul)
248 backw_stop = idxcnt;
249 }
250
251 rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
252 }
253
254
255 if (backw_stop != ~0ul)
256 {
257 /* Handle the pushed elements now. */
258 size_t backw;
259
260 backw = idxcnt;
261 while (backw > backw_stop)
262 {
263 size_t len = weights[idxarr[--backw]++];
264
265 if (needed + len < n)
266 while (len-- > 0)
267 dest[needed++] = weights[idxarr[backw]++];
268 else
269 {
270 /* No more characters fit into the buffer. */
271 needed += len;
272 idxarr[backw] += len;
273 }
274 }
275 }
276 }
277 else
278 {
279 int val = 1;
280#ifndef WIDE_CHAR_VERSION
281 char buf[7];
282 size_t buflen;
283#endif
284 size_t i;
285
286 for (idxcnt = 0; idxcnt < idxmax; ++idxcnt)
287 {
288 if ((rule & sort_forward) != 0)
289 {
290 size_t len;
291
292 if (backw_stop != ~0ul)
293 {
294 /* Handle the pushed elements now. */
295 size_t backw;
296
bc3a45ce 297 for (backw = idxcnt; backw > backw_stop; )
ccadf7b5 298 {
bc3a45ce 299 --backw;
ccadf7b5
UD
300 len = weights[idxarr[backw]++];
301 if (len != 0)
302 {
303#ifdef WIDE_CHAR_VERSION
304 if (needed + 1 + len < n)
305 {
306 dest[needed] = val;
307 for (i = 0; i < len; ++i)
308 dest[needed + 1 + i] =
309 weights[idxarr[backw] + i];
310 }
311 needed += 1 + len;
312#else
313 buflen = utf8_encode (buf, val);
314 if (needed + buflen + len < n)
315 {
316 for (i = 0; i < buflen; ++i)
317 dest[needed + i] = buf[i];
318 for (i = 0; i < len; ++i)
319 dest[needed + buflen + i] =
320 weights[idxarr[backw] + i];
321 }
322 needed += buflen + len;
323#endif
324 idxarr[backw] += len;
325 val = 1;
326 }
327 else
328 ++val;
329 }
330
331 backw_stop = ~0ul;
332 }
333
334 /* Now handle the forward element. */
335 len = weights[idxarr[idxcnt]++];
336 if (len != 0)
337 {
338#ifdef WIDE_CHAR_VERSION
339 if (needed + 1+ len < n)
340 {
341 dest[needed] = val;
342 for (i = 0; i < len; ++i)
343 dest[needed + 1 + i] =
344 weights[idxarr[idxcnt] + i];
345 }
346 needed += 1 + len;
347#else
348 buflen = utf8_encode (buf, val);
349 if (needed + buflen + len < n)
350 {
351 for (i = 0; i < buflen; ++i)
352 dest[needed + i] = buf[i];
353 for (i = 0; i < len; ++i)
354 dest[needed + buflen + i] =
355 weights[idxarr[idxcnt] + i];
356 }
357 needed += buflen + len;
358#endif
359 idxarr[idxcnt] += len;
360 val = 1;
361 }
362 else
363 /* Note that we don't have to increment `idxarr[idxcnt]'
364 since the length is zero. */
365 ++val;
366 }
367 else
368 {
369 /* Remember where the backwards series started. */
370 if (backw_stop == ~0ul)
371 backw_stop = idxcnt;
372 }
373
374 rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
375 }
376
377 if (backw_stop != ~0ul)
378 {
379 /* Handle the pushed elements now. */
380 size_t backw;
381
382 backw = idxmax - 1;
383 while (backw > backw_stop)
384 {
385 size_t len = weights[idxarr[--backw]++];
386 if (len != 0)
387 {
388#ifdef WIDE_CHAR_VERSION
389 if (needed + 1 + len < n)
390 {
391 dest[needed] = val;
392 for (i = 0; i < len; ++i)
393 dest[needed + 1 + i] =
394 weights[idxarr[backw] + i];
395 }
396 needed += 1 + len;
397#else
398 buflen = utf8_encode (buf, val);
399 if (needed + buflen + len < n)
400 {
401 for (i = 0; i < buflen; ++i)
402 dest[needed + i] = buf[i];
403 for (i = 0; i < len; ++i)
404 dest[needed + buflen + i] =
405 weights[idxarr[backw] + i];
406 }
407 needed += buflen + len;
408#endif
409 idxarr[backw] += len;
410 val = 1;
411 }
412 else
413 ++val;
414 }
415 }
416 }
417
418 /* Finally store the byte to separate the passes or terminate
419 the string. */
420 if (needed < n)
421 dest[needed] = pass + 1 < nrules ? L('\1') : L('\0');
422 ++needed;
423 }
424
425 /* This is a little optimization: many collation specifications have
426 a `position' rule at the end and if no non-ignored character
427 is found the last \1 byte is immediately followed by a \0 byte
428 signalling this. We can avoid the \1 byte(s). */
a334319f 429 if (needed <= n && needed > 2 && dest[needed - 2] == L('\1'))
ccadf7b5
UD
430 {
431 /* Remove the \1 byte. */
a334319f
UD
432 --needed;
433 dest[needed - 1] = L('\0');
ccadf7b5
UD
434 }
435
436 /* Free the memory if needed. */
437 if (use_malloc)
438 free (idxarr);
439
440 /* Return the number of bytes/words we need, but don't count the NUL
441 byte/word at the end. */
442 return needed - 1;
443}
444libc_hidden_def (STRXFRM)
445
446#ifndef WIDE_CHAR_VERSION
1ab62b32 447weak_alias (__strxfrm_l, strxfrm_l)
ccadf7b5 448#endif