]>
Commit | Line | Data |
---|---|---|
4b10dd6c | 1 | /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. |
6d52618b UD |
2 | This file is part of the GNU C Library. |
3 | Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. | |
28f540f4 | 4 | |
6d52618b UD |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Library General Public License as | |
7 | published by the Free Software Foundation; either version 2 of the | |
8 | License, or (at your option) any later version. | |
28f540f4 | 9 | |
6d52618b UD |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Library General Public License for more details. | |
28f540f4 | 14 | |
6d52618b UD |
15 | You should have received a copy of the GNU Library General Public |
16 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
17 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
18 | Boston, MA 02111-1307, USA. */ | |
28f540f4 | 19 | |
28f540f4 RM |
20 | #include <stddef.h> |
21 | #include <stdlib.h> | |
22 | #include <string.h> | |
19bc17a9 | 23 | |
59dd8641 | 24 | #ifndef WIDE_VERSION |
19bc17a9 RM |
25 | # define STRING_TYPE char |
26 | # define USTRING_TYPE unsigned char | |
59dd8641 | 27 | # define L_(Ch) Ch |
c84142e8 UD |
28 | # ifdef USE_IN_EXTENDED_LOCALE_MODEL |
29 | # define STRXFRM __strxfrm_l | |
30 | # else | |
31 | # define STRXFRM strxfrm | |
32 | # endif | |
19bc17a9 RM |
33 | # define STRLEN strlen |
34 | # define STPNCPY __stpncpy | |
35 | #endif | |
36 | ||
4b10dd6c UD |
37 | #ifndef USE_IN_EXTENDED_LOCALE_MODEL |
38 | size_t | |
39 | STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n) | |
40 | #else | |
41 | size_t | |
42 | STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) | |
43 | #endif | |
44 | { | |
45 | if (n != 0) | |
46 | STPNCPY (dest, src, n); | |
47 | ||
48 | return STRLEN (src); | |
49 | } | |
50 | ||
51 | #if 0 | |
19bc17a9 RM |
52 | /* Include the shared helper functions. `strxfrm'/`wcsxfrm' also use |
53 | these functions. */ | |
0393dfd6 | 54 | #include "../locale/weight.h" |
19bc17a9 RM |
55 | |
56 | ||
59dd8641 | 57 | #ifndef WIDE_VERSION |
19bc17a9 RM |
58 | /* Write 32 bit value UTF-8 encoded but only if enough space is left. */ |
59 | static __inline size_t | |
59dd8641 | 60 | print_val (u_int32_t value, char *dest, size_t max, size_t act) |
19bc17a9 RM |
61 | { |
62 | char tmp[6]; | |
63 | int idx = 0; | |
64 | ||
65 | if (value < 0x80) | |
66 | tmp[idx++] = (char) value; | |
67 | else | |
68 | { | |
69 | tmp[idx++] = '\x80' + (char) (value & 0x3f); | |
70 | value >>= 6; | |
71 | ||
72 | if (value < 0x20) | |
73 | tmp[idx++] = '\xc0' + (char) value; | |
74 | else | |
75 | { | |
76 | tmp[idx++] = '\x80' + (char) (value & 0x3f); | |
77 | value >>= 6; | |
78 | ||
79 | if (value < 0x10) | |
80 | tmp[idx++] = '\xe0' + (char) value; | |
81 | else | |
82 | { | |
83 | tmp[idx++] = '\x80' + (char) (value & 0x3f); | |
84 | value >>= 6; | |
85 | ||
86 | if (value < 0x08) | |
87 | tmp[idx++] = '\xf0' + (char) value; | |
88 | else | |
89 | { | |
90 | tmp[idx++] = '\x80' + (char) (value & 0x3f); | |
91 | value >>= 6; | |
92 | ||
93 | if (value < 0x04) | |
94 | tmp[idx++] = '\xf8' + (char) value; | |
95 | else | |
96 | { | |
97 | tmp[idx++] = '\x80' + (char) (value & 0x3f); | |
98 | tmp[idx++] = '\xfc' + (char) (value >> 6); | |
99 | } | |
100 | } | |
101 | } | |
102 | } | |
103 | } | |
104 | ||
105 | while (idx-- > 0) | |
106 | { | |
107 | if (act < max) | |
108 | dest[act] = tmp[idx]; | |
109 | ++act; | |
110 | } | |
111 | ||
112 | return act; | |
113 | } | |
59dd8641 RM |
114 | #else |
115 | static __inline size_t | |
116 | print_val (u_int32_t value, wchar_t *dest, size_t max, size_t act) | |
117 | { | |
118 | /* We cannot really assume wchar_t is 32 bits wide. But it is for | |
119 | GCC and so we don't do much optimization for the other case. */ | |
120 | if (sizeof (wchar_t) == 4) | |
121 | { | |
122 | if (act < max) | |
123 | dest[act] = (wchar_t) value; | |
124 | ++act; | |
125 | } | |
126 | else | |
127 | { | |
128 | wchar_t tmp[3]; | |
129 | size_t idx = 0; | |
130 | ||
131 | if (value < 0x8000) | |
132 | tmp[idx++] = (wchar_t) act; | |
133 | else | |
134 | { | |
135 | tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff)); | |
136 | value >>= 14; | |
137 | if (value < 0x2000) | |
138 | tmp[idx++] = (wchar_t) (0xc000 + value); | |
139 | else | |
140 | { | |
141 | tmp[idx++] = (wchar_t) (0x8000 + (value & 0x3fff)); | |
142 | value >>= 14; | |
143 | tmp[idx++] = (wchar_t) (0xe000 + value); | |
144 | } | |
145 | } | |
146 | while (idx-- > 0) | |
147 | { | |
148 | if (act < max) | |
149 | dest[act] = tmp[idx]; | |
150 | ++act; | |
151 | } | |
152 | } | |
153 | return act; | |
154 | } | |
155 | #endif | |
28f540f4 RM |
156 | |
157 | ||
158 | /* Transform SRC into a form such that the result of strcmp | |
159 | on two strings that have been transformed by strxfrm is | |
160 | the same as the result of strcoll on the two strings before | |
161 | their transformation. The transformed string is put in at | |
162 | most N characters of DEST and its length is returned. */ | |
c84142e8 | 163 | #ifndef USE_IN_EXTENDED_LOCALE_MODEL |
28f540f4 | 164 | size_t |
75cd5204 | 165 | STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n) |
c84142e8 UD |
166 | #else |
167 | size_t | |
168 | STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) | |
169 | #endif | |
28f540f4 | 170 | { |
c84142e8 UD |
171 | #ifdef USE_IN_EXTENDED_LOCALE_MODEL |
172 | struct locale_data *current = l->__locales[LC_COLLATE]; | |
173 | # if BYTE_ORDER == BIG_ENDIAN | |
174 | const u_int32_t *collate_table = (const u_int32_t *) | |
175 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].string; | |
176 | const u_int32_t *collate_extra = (const u_int32_t *) | |
177 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].string; | |
178 | # elif BYTE_ORDER == LITTLE_ENDIAN | |
179 | const u_int32_t *collate_table = (const u_int32_t *) | |
180 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].string; | |
181 | const u_int32_t *collate_extra = (const u_int32_t *) | |
182 | current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].string; | |
183 | # else | |
184 | # error bizarre byte order | |
185 | # endif | |
186 | #endif | |
19bc17a9 RM |
187 | weight_t *forw = NULL; |
188 | weight_t *backw = NULL; | |
189 | size_t pass; | |
190 | size_t written; | |
191 | ||
192 | /* If the current locale does not specify locale data we use normal | |
193 | 8-bit string comparison. */ | |
194 | if (collate_nrules == 0) | |
195 | { | |
196 | if (n != 0) | |
197 | STPNCPY (dest, src, n); | |
198 | ||
199 | return STRLEN (src); | |
200 | } | |
201 | ||
a7ab2023 UD |
202 | /* Handle an empty string as a special case. */ |
203 | if (*src == '\0') | |
204 | { | |
205 | if (n != 0) | |
206 | *dest = '\0'; | |
207 | return 1; | |
208 | } | |
209 | ||
19bc17a9 RM |
210 | /* Get full information about the string. This means we get |
211 | information for all passes in a special data structure. */ | |
212 | get_string (src, forw, backw); | |
213 | ||
214 | /* Now we have all the information. In at most the given number of | |
215 | passes we can finally decide about the order. */ | |
216 | written = 0; | |
217 | for (pass = 0; pass < collate_nrules; ++pass) | |
218 | { | |
219 | int forward = (collate_rules[pass] & sort_forward) != 0; | |
220 | const weight_t *run = forward ? forw : backw; | |
221 | int idx = forward ? 0 : run->data[pass].number - 1; | |
222 | ||
5a97622d | 223 | while (1) |
19bc17a9 RM |
224 | { |
225 | int ignore = 0; | |
5a97622d | 226 | u_int32_t w = 0; |
19bc17a9 RM |
227 | |
228 | /* Here we have to check for IGNORE entries. If these are | |
6d52618b | 229 | found we count them and go on with he next value. */ |
5a97622d UD |
230 | while (run != NULL |
231 | && ((w = run->data[pass].value[idx]) | |
232 | == (u_int32_t) IGNORE_CHAR)) | |
19bc17a9 RM |
233 | { |
234 | ++ignore; | |
6e4c40ba UD |
235 | if (forward |
236 | ? ++idx >= run->data[pass].number | |
237 | : --idx < 0) | |
19bc17a9 RM |
238 | { |
239 | weight_t *nextp = forward ? run->next : run->prev; | |
240 | if (nextp == NULL) | |
241 | { | |
242 | w = 0; | |
5a97622d UD |
243 | /* No more non-INGOREd elements means lowest |
244 | possible value. */ | |
245 | ignore = -1; | |
19bc17a9 | 246 | } |
5a97622d UD |
247 | else |
248 | idx = forward ? 0 : nextp->data[pass].number - 1; | |
19bc17a9 | 249 | run = nextp; |
19bc17a9 RM |
250 | } |
251 | } | |
252 | ||
5a97622d UD |
253 | /* Stop if all characters are processed. */ |
254 | if (run == NULL) | |
255 | break; | |
256 | ||
19bc17a9 RM |
257 | /* Now we have information of the number of ignored weights |
258 | and the value of the next weight. We have to add 2 | |
259 | because 0 means EOS and 1 is the intermediate string end. */ | |
260 | if ((collate_rules[pass] & sort_position) != 0) | |
261 | written = print_val (ignore + 2, dest, n, written); | |
262 | ||
263 | if (w != 0) | |
264 | written = print_val (w, dest, n, written); | |
265 | ||
266 | /* We have to increment the index counters. */ | |
04795ad9 | 267 | if (forward) |
94b78bb2 | 268 | { |
04795ad9 | 269 | if (++idx >= run->data[pass].number) |
94b78bb2 UD |
270 | { |
271 | run = run->next; | |
272 | idx = 0; | |
273 | } | |
04795ad9 UD |
274 | } |
275 | else | |
276 | { | |
277 | if (--idx < 0) | |
94b78bb2 UD |
278 | { |
279 | run = run->prev; | |
280 | if (run != NULL) | |
281 | idx = run->data[pass].number - 1; | |
282 | } | |
283 | } | |
19bc17a9 | 284 | } |
19bc17a9 RM |
285 | |
286 | /* Write marker for end of word. */ | |
287 | if (pass + 1 < collate_nrules) | |
288 | written = print_val (1, dest, n, written); | |
289 | } | |
da128169 | 290 | |
19bc17a9 | 291 | /* Terminate string. */ |
59dd8641 RM |
292 | if (written < n) |
293 | dest[written] = L_('\0'); | |
294 | ||
295 | /* Return length without counting the terminating '\0'. */ | |
296 | return written; | |
28f540f4 | 297 | } |
4b10dd6c | 298 | #endif |