]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 1995-2024 Free Software Foundation, Inc. |
c84142e8 | 2 | This file is part of the GNU C Library. |
c84142e8 UD |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
c84142e8 UD |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 12 | Lesser General Public License for more details. |
c84142e8 | 13 | |
41bdb6e2 | 14 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 15 | License along with the GNU C Library; if not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
c84142e8 | 17 | |
ccadf7b5 UD |
18 | #include <assert.h> |
19 | #include <langinfo.h> | |
20 | #include <locale.h> | |
21 | #include <stddef.h> | |
22 | #include <stdint.h> | |
23 | #include <stdlib.h> | |
24 | #include <string.h> | |
25 | #include <sys/param.h> | |
1ab62b32 | 26 | |
ccadf7b5 UD |
27 | #ifndef STRING_TYPE |
28 | # define STRING_TYPE char | |
29 | # define USTRING_TYPE unsigned char | |
30 | # define STRXFRM __strxfrm_l | |
ccadf7b5 UD |
31 | # define STRLEN strlen |
32 | # define STPNCPY __stpncpy | |
33 | # define WEIGHT_H "../locale/weight.h" | |
34 | # define SUFFIX MB | |
35 | # define L(arg) arg | |
36 | #endif | |
37 | ||
38 | #define CONCAT(a,b) CONCAT1(a,b) | |
39 | #define CONCAT1(a,b) a##b | |
40 | ||
0f9e5854 LH |
41 | /* Maximum string size that is calculated with cached indices. Right now this |
42 | is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be | |
43 | lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */ | |
44 | #define SMALL_STR_SIZE 4095 | |
45 | ||
ccadf7b5 | 46 | #include "../locale/localeinfo.h" |
8c0ab919 | 47 | #include WEIGHT_H |
ccadf7b5 | 48 | |
0f9e5854 LH |
49 | /* Group locale data for shorter parameter lists. */ |
50 | typedef struct | |
51 | { | |
535e935a | 52 | uint32_t nrules; |
0f9e5854 LH |
53 | unsigned char *rulesets; |
54 | USTRING_TYPE *weights; | |
55 | int32_t *table; | |
56 | USTRING_TYPE *extra; | |
57 | int32_t *indirect; | |
58 | } locale_data_t; | |
ccadf7b5 UD |
59 | |
60 | #ifndef WIDE_CHAR_VERSION | |
61 | ||
62 | /* We need UTF-8 encoding of numbers. */ | |
63 | static int | |
64 | utf8_encode (char *buf, int val) | |
65 | { | |
66 | int retval; | |
67 | ||
68 | if (val < 0x80) | |
69 | { | |
70 | *buf++ = (char) val; | |
71 | retval = 1; | |
72 | } | |
73 | else | |
74 | { | |
75 | int step; | |
76 | ||
77 | for (step = 2; step < 6; ++step) | |
78 | if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) | |
79 | break; | |
80 | retval = step; | |
81 | ||
82 | *buf = (unsigned char) (~0xff >> step); | |
83 | --step; | |
84 | do | |
85 | { | |
86 | buf[step] = 0x80 | (val & 0x3f); | |
87 | val >>= 6; | |
88 | } | |
89 | while (--step > 0); | |
90 | *buf |= val; | |
91 | } | |
92 | ||
93 | return retval; | |
94 | } | |
95 | #endif | |
96 | ||
0f9e5854 LH |
97 | /* Find next weight and rule index. Inlined since called for every char. */ |
98 | static __always_inline size_t | |
99 | find_idx (const USTRING_TYPE **us, int32_t *weight_idx, | |
100 | unsigned char *rule_idx, const locale_data_t *l_data, const int pass) | |
101 | { | |
102 | int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us, | |
103 | -1); | |
104 | *rule_idx = tmp >> 24; | |
105 | int32_t idx = tmp & 0xffffff; | |
106 | size_t len = l_data->weights[idx++]; | |
107 | ||
108 | /* Skip over indices of previous levels. */ | |
109 | for (int i = 0; i < pass; i++) | |
110 | { | |
111 | idx += len; | |
112 | len = l_data->weights[idx++]; | |
113 | } | |
ccadf7b5 | 114 | |
0f9e5854 LH |
115 | *weight_idx = idx; |
116 | return len; | |
117 | } | |
118 | ||
119 | static int | |
120 | find_position (const USTRING_TYPE *us, const locale_data_t *l_data, | |
121 | const int pass) | |
ccadf7b5 | 122 | { |
0f9e5854 LH |
123 | int32_t weight_idx; |
124 | unsigned char rule_idx; | |
125 | const USTRING_TYPE *usrc = us; | |
126 | ||
127 | find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); | |
128 | return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position; | |
129 | } | |
130 | ||
131 | /* Do the transformation. */ | |
132 | static size_t | |
133 | do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, | |
134 | const locale_data_t *l_data) | |
135 | { | |
136 | int32_t weight_idx; | |
137 | unsigned char rule_idx; | |
535e935a | 138 | uint32_t pass; |
0f9e5854 | 139 | size_t needed = 0; |
2f334ad5 | 140 | size_t last_needed; |
ccadf7b5 | 141 | |
0f9e5854 LH |
142 | /* Now the passes over the weights. */ |
143 | for (pass = 0; pass < l_data->nrules; ++pass) | |
ccadf7b5 | 144 | { |
0f9e5854 LH |
145 | size_t backw_len = 0; |
146 | last_needed = needed; | |
147 | const USTRING_TYPE *cur = usrc; | |
148 | const USTRING_TYPE *backw_start = NULL; | |
ccadf7b5 | 149 | |
0f9e5854 LH |
150 | /* We assume that if a rule has defined `position' in one section |
151 | this is true for all of them. */ | |
152 | int position = find_position (cur, l_data, pass); | |
ccadf7b5 | 153 | |
0f9e5854 LH |
154 | if (position == 0) |
155 | { | |
156 | while (*cur != L('\0')) | |
157 | { | |
158 | const USTRING_TYPE *pos = cur; | |
159 | size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, | |
160 | pass); | |
161 | int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; | |
ccadf7b5 | 162 | |
0f9e5854 LH |
163 | if ((rule & sort_forward) != 0) |
164 | { | |
165 | /* Handle the pushed backward sequence. */ | |
166 | if (backw_start != NULL) | |
167 | { | |
168 | for (size_t i = backw_len; i > 0; ) | |
169 | { | |
170 | int32_t weight_idx; | |
171 | unsigned char rule_idx; | |
172 | size_t len = find_idx (&backw_start, &weight_idx, | |
173 | &rule_idx, l_data, pass); | |
174 | if (needed + i < n) | |
175 | for (size_t j = len; j > 0; j--) | |
176 | dest[needed + i - j] = | |
177 | l_data->weights[weight_idx++]; | |
178 | ||
179 | i -= len; | |
180 | } | |
ccadf7b5 | 181 | |
0f9e5854 LH |
182 | needed += backw_len; |
183 | backw_start = NULL; | |
184 | backw_len = 0; | |
185 | } | |
ccadf7b5 | 186 | |
0f9e5854 LH |
187 | /* Now handle the forward element. */ |
188 | if (needed + len < n) | |
189 | while (len-- > 0) | |
190 | dest[needed++] = l_data->weights[weight_idx++]; | |
191 | else | |
192 | /* No more characters fit into the buffer. */ | |
193 | needed += len; | |
194 | } | |
195 | else | |
196 | { | |
197 | /* Remember start of the backward sequence & track length. */ | |
198 | if (backw_start == NULL) | |
199 | backw_start = pos; | |
200 | backw_len += len; | |
201 | } | |
202 | } | |
203 | ||
204 | ||
205 | /* Handle the pushed backward sequence. */ | |
206 | if (backw_start != NULL) | |
207 | { | |
208 | for (size_t i = backw_len; i > 0; ) | |
209 | { | |
210 | size_t len = find_idx (&backw_start, &weight_idx, &rule_idx, | |
211 | l_data, pass); | |
212 | if (needed + i < n) | |
213 | for (size_t j = len; j > 0; j--) | |
214 | dest[needed + i - j] = | |
215 | l_data->weights[weight_idx++]; | |
216 | ||
217 | i -= len; | |
218 | } | |
219 | ||
220 | needed += backw_len; | |
221 | } | |
222 | } | |
223 | else | |
224 | { | |
225 | int val = 1; | |
226 | #ifndef WIDE_CHAR_VERSION | |
227 | char buf[7]; | |
228 | size_t buflen; | |
229 | #endif | |
230 | size_t i; | |
231 | ||
232 | while (*cur != L('\0')) | |
233 | { | |
234 | const USTRING_TYPE *pos = cur; | |
235 | size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, | |
236 | pass); | |
237 | int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; | |
238 | ||
239 | if ((rule & sort_forward) != 0) | |
240 | { | |
241 | /* Handle the pushed backward sequence. */ | |
242 | if (backw_start != NULL) | |
243 | { | |
244 | for (size_t p = backw_len; p > 0; p--) | |
245 | { | |
246 | size_t len; | |
247 | int32_t weight_idx; | |
248 | unsigned char rule_idx; | |
249 | const USTRING_TYPE *backw_cur = backw_start; | |
250 | ||
251 | /* To prevent a warning init the used vars. */ | |
252 | len = find_idx (&backw_cur, &weight_idx, | |
253 | &rule_idx, l_data, pass); | |
254 | ||
255 | for (i = 1; i < p; i++) | |
256 | len = find_idx (&backw_cur, &weight_idx, | |
257 | &rule_idx, l_data, pass); | |
258 | ||
259 | if (len != 0) | |
260 | { | |
261 | #ifdef WIDE_CHAR_VERSION | |
262 | if (needed + 1 + len < n) | |
263 | { | |
264 | dest[needed] = val; | |
265 | for (i = 0; i < len; ++i) | |
266 | dest[needed + 1 + i] = | |
267 | l_data->weights[weight_idx + i]; | |
268 | } | |
269 | needed += 1 + len; | |
270 | #else | |
271 | buflen = utf8_encode (buf, val); | |
272 | if (needed + buflen + len < n) | |
273 | { | |
274 | for (i = 0; i < buflen; ++i) | |
275 | dest[needed + i] = buf[i]; | |
276 | for (i = 0; i < len; ++i) | |
277 | dest[needed + buflen + i] = | |
278 | l_data->weights[weight_idx + i]; | |
279 | } | |
280 | needed += buflen + len; | |
281 | #endif | |
282 | val = 1; | |
283 | } | |
284 | else | |
285 | ++val; | |
286 | } | |
287 | ||
288 | backw_start = NULL; | |
289 | backw_len = 0; | |
290 | } | |
291 | ||
292 | /* Now handle the forward element. */ | |
293 | if (len != 0) | |
294 | { | |
295 | #ifdef WIDE_CHAR_VERSION | |
296 | if (needed + 1 + len < n) | |
297 | { | |
298 | dest[needed] = val; | |
299 | for (i = 0; i < len; ++i) | |
300 | dest[needed + 1 + i] = | |
301 | l_data->weights[weight_idx + i]; | |
302 | } | |
303 | needed += 1 + len; | |
304 | #else | |
305 | buflen = utf8_encode (buf, val); | |
306 | if (needed + buflen + len < n) | |
307 | { | |
308 | for (i = 0; i < buflen; ++i) | |
309 | dest[needed + i] = buf[i]; | |
310 | for (i = 0; i < len; ++i) | |
311 | dest[needed + buflen + i] = | |
312 | l_data->weights[weight_idx + i]; | |
313 | } | |
314 | needed += buflen + len; | |
315 | #endif | |
316 | val = 1; | |
317 | } | |
318 | else | |
319 | ++val; | |
320 | } | |
321 | else | |
322 | { | |
323 | /* Remember start of the backward sequence & track length. */ | |
324 | if (backw_start == NULL) | |
325 | backw_start = pos; | |
326 | backw_len++; | |
327 | } | |
328 | } | |
329 | ||
330 | /* Handle the pushed backward sequence. */ | |
331 | if (backw_start != NULL) | |
332 | { | |
333 | for (size_t p = backw_len; p > 0; p--) | |
334 | { | |
335 | size_t len; | |
336 | int32_t weight_idx; | |
337 | unsigned char rule_idx; | |
338 | const USTRING_TYPE *backw_cur = backw_start; | |
339 | ||
340 | /* To prevent a warning init the used vars. */ | |
341 | len = find_idx (&backw_cur, &weight_idx, | |
342 | &rule_idx, l_data, pass); | |
343 | ||
344 | for (i = 1; i < p; i++) | |
345 | len = find_idx (&backw_cur, &weight_idx, | |
346 | &rule_idx, l_data, pass); | |
347 | ||
348 | if (len != 0) | |
349 | { | |
350 | #ifdef WIDE_CHAR_VERSION | |
351 | if (needed + 1 + len < n) | |
352 | { | |
353 | dest[needed] = val; | |
354 | for (i = 0; i < len; ++i) | |
355 | dest[needed + 1 + i] = | |
356 | l_data->weights[weight_idx + i]; | |
357 | } | |
358 | needed += 1 + len; | |
359 | #else | |
360 | buflen = utf8_encode (buf, val); | |
361 | if (needed + buflen + len < n) | |
362 | { | |
363 | for (i = 0; i < buflen; ++i) | |
364 | dest[needed + i] = buf[i]; | |
365 | for (i = 0; i < len; ++i) | |
366 | dest[needed + buflen + i] = | |
367 | l_data->weights[weight_idx + i]; | |
368 | } | |
369 | needed += buflen + len; | |
370 | #endif | |
371 | val = 1; | |
372 | } | |
373 | else | |
374 | ++val; | |
375 | } | |
376 | } | |
377 | } | |
378 | ||
379 | /* Finally store the byte to separate the passes or terminate | |
380 | the string. */ | |
381 | if (needed < n) | |
382 | dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); | |
383 | ++needed; | |
ccadf7b5 UD |
384 | } |
385 | ||
0f9e5854 LH |
386 | /* This is a little optimization: many collation specifications have |
387 | a `position' rule at the end and if no non-ignored character | |
388 | is found the last \1 byte is immediately followed by a \0 byte | |
389 | signalling this. We can avoid the \1 byte(s). */ | |
390 | if (needed > 2 && needed == last_needed + 1) | |
ccadf7b5 | 391 | { |
0f9e5854 LH |
392 | /* Remove the \1 byte. */ |
393 | if (--needed <= n) | |
394 | dest[needed - 1] = L('\0'); | |
ccadf7b5 | 395 | } |
ccadf7b5 | 396 | |
0f9e5854 LH |
397 | /* Return the number of bytes/words we need, but don't count the NUL |
398 | byte/word at the end. */ | |
399 | return needed - 1; | |
400 | } | |
401 | ||
402 | /* Do the transformation using weight-index and rule cache. */ | |
403 | static size_t | |
404 | do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data, | |
405 | size_t idxmax, int32_t *idxarr, const unsigned char *rulearr) | |
406 | { | |
535e935a | 407 | uint32_t nrules = l_data->nrules; |
0f9e5854 LH |
408 | unsigned char *rulesets = l_data->rulesets; |
409 | USTRING_TYPE *weights = l_data->weights; | |
535e935a | 410 | uint32_t pass; |
0f9e5854 LH |
411 | size_t needed = 0; |
412 | size_t last_needed; | |
413 | size_t idxcnt; | |
ccadf7b5 | 414 | |
0f9e5854 | 415 | /* Now the passes over the weights. */ |
ccadf7b5 UD |
416 | for (pass = 0; pass < nrules; ++pass) |
417 | { | |
418 | size_t backw_stop = ~0ul; | |
419 | int rule = rulesets[rulearr[0] * nrules + pass]; | |
420 | /* We assume that if a rule has defined `position' in one section | |
421 | this is true for all of them. */ | |
422 | int position = rule & sort_position; | |
423 | ||
2f334ad5 | 424 | last_needed = needed; |
ccadf7b5 UD |
425 | if (position == 0) |
426 | { | |
427 | for (idxcnt = 0; idxcnt < idxmax; ++idxcnt) | |
428 | { | |
429 | if ((rule & sort_forward) != 0) | |
430 | { | |
431 | size_t len; | |
432 | ||
433 | if (backw_stop != ~0ul) | |
434 | { | |
435 | /* Handle the pushed elements now. */ | |
436 | size_t backw; | |
437 | ||
bc3a45ce | 438 | for (backw = idxcnt; backw > backw_stop; ) |
ccadf7b5 | 439 | { |
bc3a45ce | 440 | --backw; |
ccadf7b5 UD |
441 | len = weights[idxarr[backw]++]; |
442 | ||
443 | if (needed + len < n) | |
444 | while (len-- > 0) | |
445 | dest[needed++] = weights[idxarr[backw]++]; | |
446 | else | |
447 | { | |
448 | /* No more characters fit into the buffer. */ | |
449 | needed += len; | |
450 | idxarr[backw] += len; | |
451 | } | |
452 | } | |
453 | ||
454 | backw_stop = ~0ul; | |
455 | } | |
456 | ||
457 | /* Now handle the forward element. */ | |
458 | len = weights[idxarr[idxcnt]++]; | |
459 | if (needed + len < n) | |
460 | while (len-- > 0) | |
461 | dest[needed++] = weights[idxarr[idxcnt]++]; | |
462 | else | |
463 | { | |
464 | /* No more characters fit into the buffer. */ | |
465 | needed += len; | |
466 | idxarr[idxcnt] += len; | |
467 | } | |
468 | } | |
469 | else | |
470 | { | |
471 | /* Remember where the backwards series started. */ | |
472 | if (backw_stop == ~0ul) | |
473 | backw_stop = idxcnt; | |
474 | } | |
475 | ||
476 | rule = rulesets[rulearr[idxcnt + 1] * nrules + pass]; | |
477 | } | |
478 | ||
479 | ||
480 | if (backw_stop != ~0ul) | |
481 | { | |
482 | /* Handle the pushed elements now. */ | |
483 | size_t backw; | |
484 | ||
485 | backw = idxcnt; | |
486 | while (backw > backw_stop) | |
487 | { | |
488 | size_t len = weights[idxarr[--backw]++]; | |
489 | ||
490 | if (needed + len < n) | |
491 | while (len-- > 0) | |
492 | dest[needed++] = weights[idxarr[backw]++]; | |
493 | else | |
494 | { | |
495 | /* No more characters fit into the buffer. */ | |
496 | needed += len; | |
497 | idxarr[backw] += len; | |
498 | } | |
499 | } | |
500 | } | |
501 | } | |
502 | else | |
503 | { | |
504 | int val = 1; | |
505 | #ifndef WIDE_CHAR_VERSION | |
506 | char buf[7]; | |
507 | size_t buflen; | |
508 | #endif | |
509 | size_t i; | |
510 | ||
511 | for (idxcnt = 0; idxcnt < idxmax; ++idxcnt) | |
512 | { | |
513 | if ((rule & sort_forward) != 0) | |
514 | { | |
515 | size_t len; | |
516 | ||
517 | if (backw_stop != ~0ul) | |
518 | { | |
519 | /* Handle the pushed elements now. */ | |
520 | size_t backw; | |
521 | ||
bc3a45ce | 522 | for (backw = idxcnt; backw > backw_stop; ) |
ccadf7b5 | 523 | { |
bc3a45ce | 524 | --backw; |
ccadf7b5 UD |
525 | len = weights[idxarr[backw]++]; |
526 | if (len != 0) | |
527 | { | |
528 | #ifdef WIDE_CHAR_VERSION | |
529 | if (needed + 1 + len < n) | |
530 | { | |
531 | dest[needed] = val; | |
532 | for (i = 0; i < len; ++i) | |
533 | dest[needed + 1 + i] = | |
534 | weights[idxarr[backw] + i]; | |
535 | } | |
536 | needed += 1 + len; | |
537 | #else | |
538 | buflen = utf8_encode (buf, val); | |
539 | if (needed + buflen + len < n) | |
540 | { | |
541 | for (i = 0; i < buflen; ++i) | |
542 | dest[needed + i] = buf[i]; | |
543 | for (i = 0; i < len; ++i) | |
544 | dest[needed + buflen + i] = | |
545 | weights[idxarr[backw] + i]; | |
546 | } | |
547 | needed += buflen + len; | |
548 | #endif | |
549 | idxarr[backw] += len; | |
550 | val = 1; | |
551 | } | |
552 | else | |
553 | ++val; | |
554 | } | |
555 | ||
556 | backw_stop = ~0ul; | |
557 | } | |
558 | ||
559 | /* Now handle the forward element. */ | |
560 | len = weights[idxarr[idxcnt]++]; | |
561 | if (len != 0) | |
562 | { | |
563 | #ifdef WIDE_CHAR_VERSION | |
564 | if (needed + 1+ len < n) | |
565 | { | |
566 | dest[needed] = val; | |
567 | for (i = 0; i < len; ++i) | |
568 | dest[needed + 1 + i] = | |
569 | weights[idxarr[idxcnt] + i]; | |
570 | } | |
571 | needed += 1 + len; | |
572 | #else | |
573 | buflen = utf8_encode (buf, val); | |
574 | if (needed + buflen + len < n) | |
575 | { | |
576 | for (i = 0; i < buflen; ++i) | |
577 | dest[needed + i] = buf[i]; | |
578 | for (i = 0; i < len; ++i) | |
579 | dest[needed + buflen + i] = | |
580 | weights[idxarr[idxcnt] + i]; | |
581 | } | |
582 | needed += buflen + len; | |
583 | #endif | |
584 | idxarr[idxcnt] += len; | |
585 | val = 1; | |
586 | } | |
587 | else | |
588 | /* Note that we don't have to increment `idxarr[idxcnt]' | |
589 | since the length is zero. */ | |
590 | ++val; | |
591 | } | |
592 | else | |
593 | { | |
594 | /* Remember where the backwards series started. */ | |
595 | if (backw_stop == ~0ul) | |
596 | backw_stop = idxcnt; | |
597 | } | |
598 | ||
599 | rule = rulesets[rulearr[idxcnt + 1] * nrules + pass]; | |
600 | } | |
601 | ||
602 | if (backw_stop != ~0ul) | |
603 | { | |
604 | /* Handle the pushed elements now. */ | |
605 | size_t backw; | |
606 | ||
607 | backw = idxmax - 1; | |
608 | while (backw > backw_stop) | |
609 | { | |
610 | size_t len = weights[idxarr[--backw]++]; | |
611 | if (len != 0) | |
612 | { | |
613 | #ifdef WIDE_CHAR_VERSION | |
614 | if (needed + 1 + len < n) | |
615 | { | |
616 | dest[needed] = val; | |
617 | for (i = 0; i < len; ++i) | |
618 | dest[needed + 1 + i] = | |
619 | weights[idxarr[backw] + i]; | |
620 | } | |
621 | needed += 1 + len; | |
622 | #else | |
623 | buflen = utf8_encode (buf, val); | |
624 | if (needed + buflen + len < n) | |
625 | { | |
626 | for (i = 0; i < buflen; ++i) | |
627 | dest[needed + i] = buf[i]; | |
628 | for (i = 0; i < len; ++i) | |
629 | dest[needed + buflen + i] = | |
630 | weights[idxarr[backw] + i]; | |
631 | } | |
632 | needed += buflen + len; | |
633 | #endif | |
634 | idxarr[backw] += len; | |
635 | val = 1; | |
636 | } | |
637 | else | |
638 | ++val; | |
639 | } | |
640 | } | |
641 | } | |
642 | ||
643 | /* Finally store the byte to separate the passes or terminate | |
644 | the string. */ | |
645 | if (needed < n) | |
646 | dest[needed] = pass + 1 < nrules ? L('\1') : L('\0'); | |
647 | ++needed; | |
648 | } | |
649 | ||
650 | /* This is a little optimization: many collation specifications have | |
651 | a `position' rule at the end and if no non-ignored character | |
652 | is found the last \1 byte is immediately followed by a \0 byte | |
653 | signalling this. We can avoid the \1 byte(s). */ | |
2f334ad5 | 654 | if (needed > 2 && needed == last_needed + 1) |
ccadf7b5 UD |
655 | { |
656 | /* Remove the \1 byte. */ | |
52a33795 | 657 | if (--needed <= n) |
2f334ad5 | 658 | dest[needed - 1] = L('\0'); |
ccadf7b5 UD |
659 | } |
660 | ||
ccadf7b5 UD |
661 | /* Return the number of bytes/words we need, but don't count the NUL |
662 | byte/word at the end. */ | |
663 | return needed - 1; | |
664 | } | |
0f9e5854 LH |
665 | |
666 | size_t | |
af85385f | 667 | STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, locale_t l) |
0f9e5854 LH |
668 | { |
669 | locale_data_t l_data; | |
670 | struct __locale_data *current = l->__locales[LC_COLLATE]; | |
671 | l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; | |
672 | ||
673 | /* Handle byte comparison case. */ | |
674 | if (l_data.nrules == 0) | |
675 | { | |
676 | size_t srclen = STRLEN (src); | |
677 | ||
678 | if (n != 0) | |
679 | STPNCPY (dest, src, MIN (srclen + 1, n)); | |
680 | ||
681 | return srclen; | |
682 | } | |
683 | ||
684 | /* Handle an empty string, code hereafter relies on strlen (src) > 0. */ | |
685 | if (*src == L('\0')) | |
686 | { | |
687 | if (n != 0) | |
688 | *dest = L('\0'); | |
689 | return 0; | |
690 | } | |
691 | ||
692 | /* Get the locale data. */ | |
693 | l_data.rulesets = (unsigned char *) | |
694 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; | |
695 | l_data.table = (int32_t *) | |
696 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; | |
697 | l_data.weights = (USTRING_TYPE *) | |
698 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; | |
699 | l_data.extra = (USTRING_TYPE *) | |
700 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; | |
701 | l_data.indirect = (int32_t *) | |
702 | current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; | |
703 | ||
704 | assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0); | |
705 | assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0); | |
706 | assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0); | |
707 | assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0); | |
708 | ||
709 | /* We need the elements of the string as unsigned values since they | |
14ef9c18 | 710 | are used as indices. */ |
0f9e5854 LH |
711 | const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; |
712 | ||
713 | /* Allocate cache for small strings on the stack and fill it with weight and | |
714 | rule indices. If the cache size is not sufficient, continue with the | |
715 | uncached xfrm version. */ | |
716 | size_t idxmax = 0; | |
717 | const USTRING_TYPE *cur = usrc; | |
718 | int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); | |
719 | unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); | |
720 | ||
721 | do | |
722 | { | |
723 | int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur, | |
724 | -1); | |
725 | rulearr[idxmax] = tmp >> 24; | |
726 | idxarr[idxmax] = tmp & 0xffffff; | |
727 | ||
728 | ++idxmax; | |
729 | } | |
730 | while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); | |
731 | ||
732 | /* This element is only read, the value never used but to determine | |
733 | another value which then is ignored. */ | |
734 | rulearr[idxmax] = '\0'; | |
735 | ||
736 | /* Do the transformation. */ | |
737 | if (*cur == L('\0')) | |
738 | return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr); | |
739 | else | |
740 | return do_xfrm (usrc, dest, n, &l_data); | |
741 | } | |
ccadf7b5 UD |
742 | libc_hidden_def (STRXFRM) |
743 | ||
744 | #ifndef WIDE_CHAR_VERSION | |
1ab62b32 | 745 | weak_alias (__strxfrm_l, strxfrm_l) |
ccadf7b5 | 746 | #endif |