]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
880f421f 1/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
c84142e8
UD
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
19bc17a9
RM
25#include <endian.h>
26#include <limits.h>
27#include <string.h>
28
29#include "locales.h"
30#include "localeinfo.h"
31#include "langinfo.h"
32#include "locfile-token.h"
33#include "stringtrans.h"
34
35/* Uncomment the following line in the production version. */
36/* define NDEBUG 1 */
37#include <assert.h>
38
39
40void *xmalloc (size_t __n);
41void *xcalloc (size_t __n, size_t __s);
42void *xrealloc (void *__ptr, size_t __n);
43
44
45/* The bit used for representing a special class. */
46#define BITPOS(class) ((class) - tok_upper)
47#define BIT(class) (1 << BITPOS (class))
48
49#define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53#define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56#define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59
60/* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
7a12c6bb 63#define char_class_t u_int16_t
19bc17a9 64#define CHAR_CLASS_TRANS SWAPU16
7a12c6bb 65#define char_class32_t u_int32_t
dc30f461 66#define CHAR_CLASS32_TRANS SWAPU32
19bc17a9
RM
67
68
69/* The real definition of the struct for the LC_CTYPE locale. */
70struct locale_ctype_t
71{
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
75
7a12c6bb
RM
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
ba1ffaa1 78 size_t nr_charclass;
19bc17a9
RM
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
7a12c6bb 82 u_int32_t *class_collection;
19bc17a9
RM
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
86
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89#define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
7a12c6bb 91 u_int32_t *map_collection[MAX_NR_CHARMAP];
9a0a462c
UD
92 size_t map_collection_max[MAX_NR_CHARMAP];
93 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
99
100 /* The arrays for the binary representation. */
7a12c6bb
RM
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
19bc17a9
RM
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
7a12c6bb
RM
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
75cd5204 111 unsigned char *width;
0200214b 112 u_int32_t mb_cur_max;
6990326c 113 const char *codeset_name;
19bc17a9
RM
114};
115
116
117/* Prototypes for local functions. */
118static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
7a12c6bb
RM
123static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
19bc17a9
RM
125static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
75cd5204
RM
127static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
19bc17a9
RM
129
130
131void
132ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
134{
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
137
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
140
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
144
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
152
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
171
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
7a12c6bb
RM
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
19bc17a9
RM
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
179
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
186
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
189 {
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
192 }
193}
194
195
196void
197ctype_finish (struct localedef_t *locale, struct charset_t *charset)
198{
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200#define NCLASS 12
201 static const struct
202 {
203 const char *name;
204 const char allow[NCLASS];
205 }
206 valid_table[NCLASS] =
207 {
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
222 };
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
227
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
230
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
233 {
234 unsigned long int tmp;
235
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
239
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
244 {
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
247 {
248 case 'M':
249 if (!eq)
250 {
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
254
255 value = ctype->charnames[cnt];
256
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
264
c84142e8
UD
265 if (!be_quiet)
266 error (0, 0, _("\
19bc17a9 267character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
c84142e8
UD
268 cp, valid_table[cls1].name,
269 valid_table[cls2].name);
19bc17a9
RM
270 }
271 break;
272
273 case 'X':
274 if (eq)
275 {
276 char buf[17];
277 char *cp = buf;
278 unsigned int value;
279
280 value = ctype->charnames[cnt];
281
282 if ((value & 0xff000000) != 0)
283 cp += sprintf (cp, "\\%o", value >> 24);
284 if ((value & 0xffff0000) != 0)
285 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
286 if ((value & 0xffffff00) != 0)
287 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
288 sprintf (cp, "\\%o", value & 0xff);
289
c84142e8
UD
290 if (!be_quiet)
291 error (0, 0, _("\
19bc17a9 292character %s'%s' in class `%s' must not be in class `%s'"),
c84142e8
UD
293 value > 256 ? "L" : "", cp,
294 valid_table[cls1].name,
295 valid_table[cls2].name);
19bc17a9
RM
296 }
297 break;
298
299 case 'D':
300 ctype->class_collection[cnt] |= 1 << cls2;
301 break;
302
303 default:
304 error (5, 0, _("internal error in %s, line %u"),
305 __FUNCTION__, __LINE__);
306 }
307 }
308 }
309
310 /* ... and now test <SP> as a special case. */
69f155d4 311 space_value = charset_find_value (&charset->char_table, "SP", 2);
880f421f
UD
312 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
313 {
314 if (!be_quiet)
315 error (0, 0, _("character <SP> not defined in character map"));
316 }
c84142e8
UD
317 else if (((cnt = BITPOS (tok_space),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_space)) == 0)
320 || (cnt = BITPOS (tok_blank),
321 (ELEM (ctype, class_collection, , space_value)
880f421f
UD
322 & BIT (tok_blank)) == 0)))
323 {
324 if (!be_quiet)
325 error (0, 0, _("<SP> character not in class `%s'"),
326 valid_table[cnt].name);
327 }
c84142e8
UD
328 else if (((cnt = BITPOS (tok_punct),
329 (ELEM (ctype, class_collection, , space_value)
330 & BIT (tok_punct)) != 0)
331 || (cnt = BITPOS (tok_graph),
332 (ELEM (ctype, class_collection, , space_value)
333 & BIT (tok_graph))
880f421f
UD
334 != 0)))
335 {
336 if (!be_quiet)
337 error (0, 0, _("<SP> character must not be in class `%s'"),
338 valid_table[cnt].name);
339 }
19bc17a9
RM
340 else
341 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
75cd5204
RM
342
343 /* Now that the tests are done make sure the name array contains all
344 characters which are handled in the WIDTH section of the
345 character set definition file. */
346 if (charset->width_rules != NULL)
347 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
348 {
349 size_t inner;
350 for (inner = charset->width_rules[cnt].from;
351 inner <= charset->width_rules[cnt].to; ++inner)
352 (void) find_idx (ctype, NULL, NULL, NULL, inner);
353 }
19bc17a9
RM
354}
355
356
357void
75cd5204
RM
358ctype_output (struct localedef_t *locale, struct charset_t *charset,
359 const char *output_path)
19bc17a9
RM
360{
361 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
362 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
363 + 2 * (ctype->map_collection_nr - 2));
75cd5204
RM
364 struct iovec iov[2 + nelems + ctype->nr_charclass
365 + ctype->map_collection_nr];
19bc17a9 366 struct locale_file data;
7a12c6bb 367 u_int32_t idx[nelems];
75cd5204 368 size_t elem, cnt, offset, total;
19bc17a9
RM
369
370
371 if ((locale->binary & (1 << LC_CTYPE)) != 0)
372 {
373 iov[0].iov_base = ctype;
374 iov[0].iov_len = locale->len[LC_CTYPE];
375
376 write_locale_data (output_path, "LC_CTYPE", 1, iov);
377
378 return;
379 }
380
381
382 /* Now prepare the output: Find the sizes of the table we can use. */
75cd5204 383 allocate_arrays (ctype, charset);
19bc17a9
RM
384
385 data.magic = LIMAGIC (LC_CTYPE);
386 data.n = nelems;
387 iov[0].iov_base = (void *) &data;
388 iov[0].iov_len = sizeof (data);
389
390 iov[1].iov_base = (void *) idx;
391 iov[1].iov_len = sizeof (idx);
392
393 idx[0] = iov[0].iov_len + iov[1].iov_len;
394 offset = 0;
395
396 for (elem = 0; elem < nelems; ++elem)
397 {
398 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
399 switch (elem)
400 {
401#define CTYPE_DATA(name, base, len) \
402 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
403 iov[2 + elem + offset].iov_base = (base); \
404 iov[2 + elem + offset].iov_len = (len); \
75cd5204
RM
405 if (elem + 1 < nelems) \
406 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
407 break
408
409 CTYPE_DATA (_NL_CTYPE_CLASS,
410 ctype->ctype_b,
411 (256 + 128) * sizeof (char_class_t));
412
413 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
414 ctype->map_eb[0],
415 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 416 * sizeof (u_int32_t));
19bc17a9
RM
417 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
418 ctype->map_eb[1],
419 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 420 * sizeof (u_int32_t));
19bc17a9
RM
421
422 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
423 ctype->map_el[0],
424 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 425 * sizeof (u_int32_t));
19bc17a9
RM
426 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
427 ctype->map_el[1],
428 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 429 * sizeof (u_int32_t));
19bc17a9
RM
430
431 CTYPE_DATA (_NL_CTYPE_CLASS32,
432 ctype->ctype32_b,
433 (ctype->plane_size * ctype->plane_cnt
434 * sizeof (char_class32_t)));
435
436 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
7a12c6bb
RM
437 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
438 * sizeof (u_int32_t)));
19bc17a9 439 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
7a12c6bb
RM
440 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
441 * sizeof (u_int32_t)));
19bc17a9
RM
442
443 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
7a12c6bb 444 &ctype->plane_size, sizeof (u_int32_t));
19bc17a9 445 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
7a12c6bb 446 &ctype->plane_cnt, sizeof (u_int32_t));
19bc17a9 447
75cd5204
RM
448 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
449 /* The class name array. */
450 total = 0;
451 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
452 {
453 iov[2 + elem + offset].iov_base
454 = (void *) ctype->classnames[cnt];
455 iov[2 + elem + offset].iov_len
456 = strlen (ctype->classnames[cnt]) + 1;
457 total += iov[2 + elem + offset].iov_len;
458 }
ce7a5ef4
RM
459 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
460 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
461 total += 1 + (4 - ((total + 1) % 4));
75cd5204
RM
462
463 if (elem + 1 < nelems)
464 idx[elem + 1] = idx[elem] + total;
465 break;
466
467 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
468 /* The class name array. */
469 total = 0;
470 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
471 {
472 iov[2 + elem + offset].iov_base
473 = (void *) ctype->mapnames[cnt];
474 iov[2 + elem + offset].iov_len
475 = strlen (ctype->mapnames[cnt]) + 1;
476 total += iov[2 + elem + offset].iov_len;
477 }
ce7a5ef4
RM
478 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
479 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
480 total += 1 + (4 - ((total + 1) % 4));
75cd5204
RM
481
482 if (elem + 1 < nelems)
483 idx[elem + 1] = idx[elem] + total;
484 break;
19bc17a9
RM
485
486 CTYPE_DATA (_NL_CTYPE_WIDTH,
75cd5204 487 ctype->width, ctype->plane_size * ctype->plane_cnt);
19bc17a9 488
0200214b
RM
489 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
490 &ctype->mb_cur_max, sizeof (u_int32_t));
491
ce7a5ef4
RM
492 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
493 total = strlen (ctype->codeset_name) + 1;
494 if (total % 4 == 0)
495 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
496 else
497 {
498 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
499 memset (mempcpy (iov[2 + elem + offset].iov_base,
500 ctype->codeset_name, total),
501 '\0', 4 - (total & 3));
ce7a5ef4
RM
502 total = (total + 3) & ~3;
503 }
504 iov[2 + elem + offset].iov_len = total;
505 if (elem + 1 < nelems)
506 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
507 break;
6990326c 508
19bc17a9
RM
509 default:
510 assert (! "unknown CTYPE element");
511 }
512 else
513 {
514 /* Handle extra maps. */
515 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
516
517 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
75cd5204 518 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
19bc17a9 519 else
75cd5204 520 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
19bc17a9 521
75cd5204
RM
522 iov[2 + elem + offset].iov_len = ((ctype->plane_size
523 * ctype->plane_cnt + 128)
7a12c6bb 524 * sizeof (u_int32_t));
19bc17a9 525
75cd5204
RM
526 if (elem + 1 < nelems)
527 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
528 }
19bc17a9 529 }
19bc17a9 530
75cd5204
RM
531 assert (2 + elem + offset == (nelems + ctype->nr_charclass
532 + ctype->map_collection_nr + 2));
19bc17a9 533
75cd5204 534 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
535}
536
537
538/* Character class handling. */
539void
540ctype_class_new (struct linereader *lr, struct localedef_t *locale,
541 enum token_t tok, struct token *code,
542 struct charset_t *charset)
543{
544 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
545 code->val.str.start);
546}
547
548
549int
550ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
551 const char *name)
552{
ba1ffaa1 553 size_t cnt;
19bc17a9
RM
554
555 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
556 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
557 == 0)
558 return 1;
559
560 return 0;
561}
562
563
564void
565ctype_class_start (struct linereader *lr, struct localedef_t *locale,
566 enum token_t tok, const char *str,
567 struct charset_t *charset)
568{
569 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
ba1ffaa1 570 size_t cnt;
19bc17a9
RM
571
572 switch (tok)
573 {
574 case tok_upper:
575 str = "upper";
576 break;
577 case tok_lower:
578 str = "lower";
579 break;
580 case tok_alpha:
581 str = "alpha";
582 break;
583 case tok_digit:
584 str = "digit";
585 break;
586 case tok_xdigit:
587 str = "xdigit";
588 break;
589 case tok_space:
590 str = "space";
591 break;
592 case tok_print:
593 str = "print";
594 break;
595 case tok_graph:
596 str = "graph";
597 break;
598 case tok_blank:
599 str = "blank";
600 break;
601 case tok_cntrl:
602 str = "cntrl";
603 break;
604 case tok_punct:
605 str = "punct";
606 break;
607 case tok_alnum:
608 str = "alnum";
609 break;
610 case tok_ident:
611 break;
612 default:
613 assert (! "illegal token as class name: should not happen");
614 }
615
616 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
617 if (strcmp (str, ctype->classnames[cnt]) == 0)
618 break;
619
620 if (cnt >= ctype->nr_charclass)
621 assert (! "unknown class in class definition: should not happen");
622
623 ctype->class_done |= BIT (tok);
624
625 ctype->current_class_mask = 1 << cnt;
626 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
627}
628
629
630void
631ctype_class_from (struct linereader *lr, struct localedef_t *locale,
632 struct token *code, struct charset_t *charset)
633{
634 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
635 unsigned int value;
636
69f155d4
UD
637 value = charset_find_value (&charset->char_table, code->val.str.start,
638 code->val.str.len);
19bc17a9
RM
639
640 ctype->last_class_char = value;
641
ba1ffaa1 642 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
19bc17a9
RM
643 /* In the LC_CTYPE category it is no error when a character is
644 not found. This has to be ignored silently. */
645 return;
646
647 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
648 &ctype->class_collection_act, value)
649 |= ctype->current_class_mask;
650}
651
652
653void
654ctype_class_to (struct linereader *lr, struct localedef_t *locale,
655 struct token *code, struct charset_t *charset)
656{
657 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
658 unsigned int value, cnt;
659
69f155d4
UD
660 value = charset_find_value (&charset->char_table, code->val.str.start,
661 code->val.str.len);
19bc17a9 662
880f421f
UD
663 /* In the LC_CTYPE category it is no error when a character is
664 not found. This has to be ignored silently. */
665 if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
666 && (wchar_t) value != ILLEGAL_CHAR_VALUE)
667 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
668 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
669 &ctype->class_collection_act, cnt)
670 |= ctype->current_class_mask;
19bc17a9
RM
671
672 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
673}
674
675
676void
677ctype_class_end (struct linereader *lr, struct localedef_t *locale)
678{
679 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
680
681 /* We have no special actions to perform here. */
682 ctype->current_class_mask = 0;
683 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
684}
685
686
687/* Character map handling. */
688void
689ctype_map_new (struct linereader *lr, struct localedef_t *locale,
690 enum token_t tok, struct token *code,
691 struct charset_t *charset)
692{
693 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
694 code->val.str.start, charset);
695}
696
697
698int
503054c0
RM
699ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
700 const char *name)
19bc17a9
RM
701{
702 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
703 size_t cnt;
704
705 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
706 if (strcmp (name, ctype->mapnames[cnt]) == 0)
707 return 1;
708
709 return 0;
710}
711
712
713void
714ctype_map_start (struct linereader *lr, struct localedef_t *locale,
715 enum token_t tok, const char *name, struct charset_t *charset)
716{
717 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
718 size_t cnt;
719
720 switch (tok)
721 {
722 case tok_toupper:
723 ctype->toupper_done = 1;
724 name = "toupper";
725 break;
726 case tok_tolower:
727 ctype->tolower_done = 1;
728 name = "tolower";
729 break;
730 case tok_ident:
731 break;
732 default:
733 assert (! "unknown token in category `LC_CTYPE' should not happen");
734 }
735
736 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
737 if (strcmp (name, ctype->mapnames[cnt]) == 0)
738 break;
739
740 if (cnt == ctype->map_collection_nr)
741 assert (! "unknown token in category `LC_CTYPE' should not happen");
742
743 ctype->last_map_idx = cnt;
744 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
745}
746
747
748void
749ctype_map_from (struct linereader *lr, struct localedef_t *locale,
750 struct token *code, struct charset_t *charset)
751{
752 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
753 unsigned int value;
754
69f155d4
UD
755 value = charset_find_value (&charset->char_table, code->val.str.start,
756 code->val.str.len);
19bc17a9 757
ba1ffaa1 758 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
19bc17a9
RM
759 /* In the LC_CTYPE category it is no error when a character is
760 not found. This has to be ignored silently. */
761 return;
762
763 assert (ctype->last_map_idx < ctype->map_collection_nr);
764
765 ctype->from_map_char = value;
766}
767
768
769void
770ctype_map_to (struct linereader *lr, struct localedef_t *locale,
771 struct token *code, struct charset_t *charset)
772{
773 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
774 unsigned int value;
775
69f155d4
UD
776 value = charset_find_value (&charset->char_table, code->val.str.start,
777 code->val.str.len);
19bc17a9 778
ba1ffaa1
UD
779 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
780 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
19bc17a9
RM
781 {
782 /* In the LC_CTYPE category it is no error when a character is
783 not found. This has to be ignored silently. */
784 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
785 return;
786 }
787
788 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
789 &ctype->map_collection_max[ctype->last_map_idx],
790 &ctype->map_collection_act[ctype->last_map_idx],
791 ctype->from_map_char) = value;
792
793 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
794}
795
796
797void
798ctype_map_end (struct linereader *lr, struct localedef_t *locale)
799{
800 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
801
802 ctype->last_map_idx = MAX_NR_CHARMAP;
803 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
804}
805
806
807/* Local functions. */
808static void
809ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
810 const char *name)
811{
ba1ffaa1 812 size_t cnt;
19bc17a9
RM
813
814 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
815 if (strcmp (ctype->classnames[cnt], name) == 0)
816 break;
817
818 if (cnt < ctype->nr_charclass)
819 {
ba1ffaa1 820 lr_error (lr, _("character class `%s' already defined"), name);
19bc17a9
RM
821 return;
822 }
823
824 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
825 /* Exit code 2 is prescribed in P1003.2b. */
826 error (2, 0, _("\
827implementation limit: no more than %d character classes allowed"),
828 MAX_NR_CHARCLASS);
829
830 ctype->classnames[ctype->nr_charclass++] = name;
831}
832
833
834static void
835ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
836 const char *name, struct charset_t *charset)
837{
838 size_t max_chars = 0;
ba1ffaa1 839 size_t cnt;
19bc17a9
RM
840
841 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
842 {
843 if (strcmp (ctype->mapnames[cnt], name) == 0)
844 break;
845
846 if (max_chars < ctype->map_collection_max[cnt])
847 max_chars = ctype->map_collection_max[cnt];
848 }
849
850 if (cnt < ctype->map_collection_nr)
851 {
ba1ffaa1 852 lr_error (lr, _("character map `%s' already defined"), name);
19bc17a9
RM
853 return;
854 }
855
856 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
857 /* Exit code 2 is prescribed in P1003.2b. */
858 error (2, 0, _("\
859implementation limit: no more than %d character maps allowed"),
860 MAX_NR_CHARMAP);
861
862 ctype->mapnames[cnt] = name;
863
864 if (max_chars == 0)
a5b7bf0e 865 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
19bc17a9
RM
866 else
867 ctype->map_collection_max[cnt] = max_chars;
868
7a12c6bb
RM
869 ctype->map_collection[cnt] = (u_int32_t *)
870 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9 871 memset (ctype->map_collection[cnt], '\0',
7a12c6bb 872 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9
RM
873 ctype->map_collection_act[cnt] = 256;
874
875 ++ctype->map_collection_nr;
876}
877
878
75cd5204
RM
879/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
880 is possible if we only want ot extend the name array. */
7a12c6bb
RM
881static u_int32_t *
882find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
19bc17a9
RM
883 size_t *act, unsigned int idx)
884{
885 size_t cnt;
886
887 if (idx < 256)
75cd5204 888 return table == NULL ? NULL : &(*table)[idx];
19bc17a9
RM
889
890 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
891 if (ctype->charnames[cnt] == idx)
892 break;
893
894 /* We have to distinguish two cases: the names is found or not. */
895 if (cnt == ctype->charnames_act)
896 {
897 /* Extend the name array. */
898 if (ctype->charnames_act == ctype->charnames_max)
899 {
900 ctype->charnames_max *= 2;
901 ctype->charnames = (unsigned int *)
902 xrealloc (ctype->charnames,
903 sizeof (unsigned int) * ctype->charnames_max);
904 }
905 ctype->charnames[ctype->charnames_act++] = idx;
906 }
907
75cd5204
RM
908 if (table == NULL)
909 /* We have done everything we are asked to do. */
910 return NULL;
911
19bc17a9
RM
912 if (cnt >= *act)
913 {
914 if (cnt >= *max)
915 {
916 size_t old_max = *max;
917 do
918 *max *= 2;
919 while (*max <= cnt);
920
921 *table =
7a12c6bb
RM
922 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
923 memset (&(*table)[old_max], '\0',
924 (*max - old_max) * sizeof (u_int32_t));
19bc17a9
RM
925 }
926
927 (*table)[cnt] = 0;
928 *act = cnt;
929 }
930
931 return &(*table)[cnt];
932}
933
934
935static void
936set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
937{
938 /* These function defines the default values for the classes and conversions
939 according to POSIX.2 2.5.2.1.
940 It may seem that the order of these if-blocks is arbitrary but it is NOT.
941 Don't move them unless you know what you do! */
942
943 void set_default (int bit, int from, int to)
944 {
945 char tmp[2];
946 int ch;
947 /* Define string. */
948 strcpy (tmp, "?");
949
950 for (ch = from; ch <= to; ++ch)
951 {
952 unsigned int value;
953 tmp[0] = ch;
954
69f155d4 955 value = charset_find_value (&charset->char_table, tmp, 1);
880f421f 956 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
19bc17a9 957 {
880f421f
UD
958 if (!be_quiet)
959 error (0, 0, _("\
19bc17a9 960character `%s' not defined while needed as default value"),
880f421f 961 tmp);
19bc17a9
RM
962 continue;
963 }
964 else
965 ELEM (ctype, class_collection, , value) |= bit;
966 }
967 }
968
969 /* Set default values if keyword was not present. */
970 if ((ctype->class_done & BIT (tok_upper)) == 0)
971 /* "If this keyword [lower] is not specified, the lowercase letters
972 `A' through `Z', ..., shall automatically belong to this class,
973 with implementation defined character values." [P1003.2, 2.5.2.1] */
974 set_default (BIT (tok_upper), 'A', 'Z');
975
976 if ((ctype->class_done & BIT (tok_lower)) == 0)
977 /* "If this keyword [lower] is not specified, the lowercase letters
978 `a' through `z', ..., shall automatically belong to this class,
979 with implementation defined character values." [P1003.2, 2.5.2.1] */
980 set_default (BIT (tok_lower), 'a', 'z');
981
982 if ((ctype->class_done & BIT (tok_alpha)) == 0)
983 {
984 /* Table 2-6 in P1003.2 says that characters in class `upper' or
985 class `lower' *must* be in class `alpha'. */
986 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
987 size_t cnt;
988
989 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
990 if ((ctype->class_collection[cnt] & mask) != 0)
991 ctype->class_collection[cnt] |= BIT (tok_alpha);
992 }
993
994 if ((ctype->class_done & BIT (tok_digit)) == 0)
995 /* "If this keyword [digit] is not specified, the digits `0' through
996 `9', ..., shall automatically belong to this class, with
997 implementation-defined character values." [P1003.2, 2.5.2.1] */
998 set_default (BIT (tok_digit), '0', '9');
999
1000 /* "Only characters specified for the `alpha' and `digit' keyword
1001 shall be specified. Characters specified for the keyword `alpha'
1002 and `digit' are automatically included in this class. */
1003 {
1004 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
1005 size_t cnt;
1006
1007 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1008 if ((ctype->class_collection[cnt] & mask) != 0)
1009 ctype->class_collection[cnt] |= BIT (tok_alnum);
1010 }
1011
1012 if ((ctype->class_done & BIT (tok_space)) == 0)
1013 /* "If this keyword [space] is not specified, the characters <space>,
1014 <form-feed>, <newline>, <carriage-return>, <tab>, and
1015 <vertical-tab>, ..., shall automatically belong to this class,
1016 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1017 {
1018 unsigned int value;
1019
69f155d4 1020 value = charset_find_value (&charset->char_table, "space", 5);
880f421f
UD
1021 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1022 {
1023 if (!be_quiet)
1024 error (0, 0, _("\
19bc17a9 1025character `%s' not defined while needed as default value"),
880f421f
UD
1026 "<space>");
1027 }
19bc17a9
RM
1028 else
1029 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1030
69f155d4 1031 value = charset_find_value (&charset->char_table, "form-feed", 9);
880f421f
UD
1032 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1033 {
1034 if (!be_quiet)
1035 error (0, 0, _("\
19bc17a9 1036character `%s' not defined while needed as default value"),
880f421f
UD
1037 "<form-feed>");
1038 }
19bc17a9
RM
1039 else
1040 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1041
69f155d4 1042 value = charset_find_value (&charset->char_table, "newline", 7);
880f421f
UD
1043 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1044 {
1045 if (!be_quiet)
1046 error (0, 0, _("\
19bc17a9 1047character `%s' not defined while needed as default value"),
880f421f
UD
1048 "<newline>");
1049 }
19bc17a9
RM
1050 else
1051 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1052
69f155d4 1053 value = charset_find_value (&charset->char_table, "carriage-return", 15);
880f421f
UD
1054 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1055 {
1056 if (!be_quiet)
1057 error (0, 0, _("\
19bc17a9 1058character `%s' not defined while needed as default value"),
880f421f
UD
1059 "<carriage-return>");
1060 }
19bc17a9
RM
1061 else
1062 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1063
69f155d4 1064 value = charset_find_value (&charset->char_table, "tab", 3);
880f421f
UD
1065 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1066 {
1067 if (!be_quiet)
1068 error (0, 0, _("\
19bc17a9 1069character `%s' not defined while needed as default value"),
880f421f
UD
1070 "<tab>");
1071 }
19bc17a9
RM
1072 else
1073 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1074
69f155d4 1075 value = charset_find_value (&charset->char_table, "vertical-tab", 12);
880f421f
UD
1076 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1077 {
1078 if (!be_quiet)
1079 error (0, 0, _("\
19bc17a9 1080character `%s' not defined while needed as default value"),
880f421f
UD
1081 "<vertical-tab>");
1082 }
19bc17a9
RM
1083 else
1084 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1085 }
1086
1087 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1088 /* "If this keyword is not specified, the digits `0' to `9', the
1089 uppercase letters `A' through `F', and the lowercase letters `a'
1090 through `f', ..., shell automatically belong to this class, with
1091 implementation defined character values." [P1003.2, 2.5.2.1] */
1092 {
1093 set_default (BIT (tok_xdigit), '0', '9');
1094 set_default (BIT (tok_xdigit), 'A', 'F');
1095 set_default (BIT (tok_xdigit), 'a', 'f');
1096 }
1097
1098 if ((ctype->class_done & BIT (tok_blank)) == 0)
1099 /* "If this keyword [blank] is unspecified, the characters <space> and
1100 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1101 {
1102 unsigned int value;
1103
69f155d4 1104 value = charset_find_value (&charset->char_table, "space", 5);
880f421f
UD
1105 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1106 {
1107 if (!be_quiet)
1108 error (0, 0, _("\
19bc17a9 1109character `%s' not defined while needed as default value"),
880f421f
UD
1110 "<space>");
1111 }
19bc17a9
RM
1112 else
1113 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1114
69f155d4 1115 value = charset_find_value (&charset->char_table, "tab", 3);
880f421f
UD
1116 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1117 {
1118 if (!be_quiet)
1119 error (0, 0, _("\
19bc17a9 1120character `%s' not defined while needed as default value"),
880f421f
UD
1121 "<tab>");
1122 }
19bc17a9
RM
1123 else
1124 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1125 }
1126
1127 if ((ctype->class_done & BIT (tok_graph)) == 0)
1128 /* "If this keyword [graph] is not specified, characters specified for
1129 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1130 shall belong to this character class." [P1003.2, 2.5.2.1] */
1131 {
1132 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1133 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1134 size_t cnt;
1135
1136 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1137 if ((ctype->class_collection[cnt] & mask) != 0)
1138 ctype->class_collection[cnt] |= BIT (tok_graph);
1139 }
1140
1141 if ((ctype->class_done & BIT (tok_print)) == 0)
1142 /* "If this keyword [print] is not provided, characters specified for
1143 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1144 and the <space> character shall belong to this character class."
1145 [P1003.2, 2.5.2.1] */
1146 {
1147 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1148 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1149 size_t cnt;
ba1ffaa1 1150 wchar_t space;
19bc17a9
RM
1151
1152 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1153 if ((ctype->class_collection[cnt] & mask) != 0)
1154 ctype->class_collection[cnt] |= BIT (tok_print);
1155
69f155d4 1156 space = charset_find_value (&charset->char_table, "space", 5);
880f421f
UD
1157 if (space == ILLEGAL_CHAR_VALUE)
1158 {
1159 if (!be_quiet)
1160 error (0, 0, _("\
19bc17a9 1161character `%s' not defined while needed as default value"),
880f421f
UD
1162 "<space>");
1163 }
19bc17a9
RM
1164 else
1165 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1166 }
1167
1168 if (ctype->toupper_done == 0)
6d52618b 1169 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
1170 `a' through `z', and their corresponding uppercase letters `A' to
1171 `Z', ..., shall automatically be included, with implementation-
1172 defined character values." [P1003.2, 2.5.2.1] */
1173 {
1174 char tmp[4];
1175 int ch;
1176
1177 strcpy (tmp, "<?>");
1178
1179 for (ch = 'a'; ch <= 'z'; ++ch)
1180 {
1181 unsigned int value_from, value_to;
1182
1183 tmp[1] = (char) ch;
1184
69f155d4 1185 value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
880f421f 1186 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
19bc17a9 1187 {
880f421f
UD
1188 if (!be_quiet)
1189 error (0, 0, _("\
ba1ffaa1 1190character `%s' not defined while needed as default value"),
880f421f 1191 tmp);
19bc17a9
RM
1192 continue;
1193 }
1194
1195 /* This conversion is implementation defined. */
1196 tmp[1] = (char) (ch + ('A' - 'a'));
69f155d4 1197 value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
880f421f 1198 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
19bc17a9 1199 {
880f421f
UD
1200 if (!be_quiet)
1201 error (0, 0, _("\
19bc17a9 1202character `%s' not defined while needed as default value"),
880f421f 1203 tmp);
19bc17a9
RM
1204 continue;
1205 }
1206
1207 /* The index [0] is determined by the order of the
1208 `ctype_map_newP' calls in `ctype_startup'. */
1209 ELEM (ctype, map_collection, [0], value_from) = value_to;
1210 }
1211 }
1212
1213 if (ctype->tolower_done == 0)
1214 /* "If this keyword [tolower] is not specified, the mapping shall be
1215 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1216 {
1217 size_t cnt;
1218
1219 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1220 if (ctype->map_collection[0][cnt] != 0)
1221 ELEM (ctype, map_collection, [1],
1222 ctype->map_collection[0][cnt])
1223 = ctype->charnames[cnt];
1224 }
1225}
1226
1227
1228static void
75cd5204 1229allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
19bc17a9
RM
1230{
1231 size_t idx;
1232
6d52618b
UD
1233 /* First we have to decide how we organize the arrays. It is easy
1234 for a one-byte character set. But multi-byte character set
1235 cannot be stored flat because the chars might be sparsely used.
1236 So we determine an optimal hashing function for the used
1237 characters.
1238
1239 We use a very trivial hashing function to store the sparse
1240 table. CH % TABSIZE is used as an index. To solve multiple hits
1241 we have N planes. This guarantees a fixed search time for a
1242 character [N / 2]. In the following code we determine the minmum
1243 value for TABSIZE * N, where TABSIZE >= 256. */
19bc17a9
RM
1244 size_t min_total = UINT_MAX;
1245 size_t act_size = 256;
1246
c84142e8
UD
1247 if (!be_quiet)
1248 fputs (_("\
19bc17a9 1249Computing table size for character classes might take a while..."),
c84142e8 1250 stderr);
19bc17a9
RM
1251
1252 while (act_size < min_total)
1253 {
1254 size_t cnt[act_size];
1255 size_t act_planes = 1;
1256
1257 memset (cnt, '\0', sizeof cnt);
1258
1259 for (idx = 0; idx < 256; ++idx)
1260 cnt[idx] = 1;
1261
1262 for (idx = 0; idx < ctype->charnames_act; ++idx)
1263 if (ctype->charnames[idx] >= 256)
1264 {
1265 size_t nr = ctype->charnames[idx] % act_size;
1266
1267 if (++cnt[nr] > act_planes)
1268 {
1269 act_planes = cnt[nr];
1270 if (act_size * act_planes >= min_total)
1271 break;
1272 }
1273 }
1274
1275 if (act_size * act_planes < min_total)
1276 {
1277 min_total = act_size * act_planes;
1278 ctype->plane_size = act_size;
1279 ctype->plane_cnt = act_planes;
1280 }
1281
1282 ++act_size;
1283 }
1284
c84142e8
UD
1285 if (!be_quiet)
1286 fputs (_(" done\n"), stderr);
19bc17a9 1287
75cd5204 1288
19bc17a9
RM
1289#if __BYTE_ORDER == __LITTLE_ENDIAN
1290# define NAMES_B1 ctype->names_el
1291# define NAMES_B2 ctype->names_eb
1292#else
1293# define NAMES_B1 ctype->names_eb
1294# define NAMES_B2 ctype->names_el
1295#endif
1296
7a12c6bb
RM
1297 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1298 * ctype->plane_cnt,
1299 sizeof (u_int32_t));
1300 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1301 * ctype->plane_cnt,
1302 sizeof (u_int32_t));
19bc17a9
RM
1303
1304 for (idx = 1; idx < 256; ++idx)
1305 NAMES_B1[idx] = idx;
1306
1307 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1308 NAMES_B1[0] = 1;
1309
1310 for (idx = 256; idx < ctype->charnames_act; ++idx)
1311 {
1312 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1313 size_t depth = 0;
1314
1315 while (NAMES_B1[nr + depth * ctype->plane_size])
1316 ++depth;
1317 assert (depth < ctype->plane_cnt);
1318
1319 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1320
1321 /* Now for faster access remember the index in the NAMES_B array. */
1322 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1323 }
1324 NAMES_B1[0] = 0;
1325
1326 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1327 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1328
1329
1330 /* You wonder about this amount of memory? This is only because some
1331 users do not manage to address the array with unsigned values or
1332 data types with range >= 256. '\200' would result in the array
1333 index -128. To help these poor people we duplicate the entries for
1334 128 up to 255 below the entry for \0. */
1335 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1336 sizeof (char_class_t));
1337 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1338 * ctype->plane_cnt,
1339 sizeof (char_class32_t));
1340
1341 /* Fill in the character class information. */
1342#if __BYTE_ORDER == __LITTLE_ENDIAN
1343# define TRANS(w) CHAR_CLASS_TRANS (w)
1344# define TRANS32(w) CHAR_CLASS32_TRANS (w)
1345#else
1346# define TRANS(w) (w)
1347# define TRANS32(w) (w)
1348#endif
1349
1350 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1351 if (ctype->charnames[idx] < 256)
1352 ctype->ctype_b[128 + ctype->charnames[idx]]
1353 = TRANS (ctype->class_collection[idx]);
1354
75cd5204
RM
1355 /* Mirror first 127 entries. We must take care that entry -1 is not
1356 mirrored because EOF == -1. */
1357 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
1358 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1359
1360 /* The 32 bit array contains all characters. */
1361 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1362 ctype->ctype32_b[ctype->charnames[idx]]
1363 = TRANS32 (ctype->class_collection[idx]);
1364
1365 /* Room for table of mappings. */
7a12c6bb
RM
1366 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1367 * sizeof (u_int32_t *));
1368 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1369 * sizeof (u_int32_t *));
19bc17a9
RM
1370
1371 /* Fill in all mappings. */
1372 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1373 {
1374 unsigned int idx2;
1375
1376 /* Allocate table. */
7a12c6bb
RM
1377 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1378 * ctype->plane_cnt + 128)
1379 * sizeof (u_int32_t));
1380 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1381 * ctype->plane_cnt + 128)
1382 * sizeof (u_int32_t));
19bc17a9
RM
1383
1384#if __BYTE_ORDER == __LITTLE_ENDIAN
1385# define MAP_B1 ctype->map_el
1386# define MAP_B2 ctype->map_eb
1387#else
1388# define MAP_B1 ctype->map_eb
1389# define MAP_B2 ctype->map_el
1390#endif
1391
1392 /* Copy default value (identity mapping). */
1393 memcpy (&MAP_B1[idx][128], NAMES_B1,
7a12c6bb 1394 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
19bc17a9
RM
1395
1396 /* Copy values from collection. */
1397 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1398 if (ctype->map_collection[idx][idx2] != 0)
1399 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1400 ctype->map_collection[idx][idx2];
1401
75cd5204
RM
1402 /* Mirror first 127 entries. We must take care not to map entry
1403 -1 because EOF == -1. */
1404 for (idx2 = 0; idx2 < 127; ++idx2)
19bc17a9
RM
1405 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1406
75cd5204
RM
1407 /* EOF must map to EOF. */
1408 MAP_B1[idx][127] = EOF;
19bc17a9
RM
1409
1410 /* And now the other byte order. */
1411 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1412 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1413 }
1414
1415 /* Extra array for class and map names. */
7a12c6bb
RM
1416 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1417 * sizeof (u_int32_t));
1418 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1419 * sizeof (u_int32_t));
75cd5204
RM
1420
1421 /* Array for width information. Because the expected width are very
1422 small we use only one single byte. This save space and we need
1423 not provide the information twice with both endianesses. */
1424 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1425 * ctype->plane_cnt);
1426 /* Initialize with default width value. */
1427 memset (ctype->width, charset->width_default,
1428 ctype->plane_size * ctype->plane_cnt);
1429 if (charset->width_rules != NULL)
1430 {
1431 size_t cnt;
1432
1433 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1434 if (charset->width_rules[cnt].width != charset->width_default)
1435 for (idx = charset->width_rules[cnt].from;
1436 idx <= charset->width_rules[cnt].to; ++idx)
1437 {
1438 size_t nr = idx % ctype->plane_size;
1439 size_t depth = 0;
1440
1441 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1442 ++depth;
1443 assert (depth < ctype->plane_cnt);
1444
1445 ctype->width[nr + depth * ctype->plane_size]
1446 = charset->width_rules[cnt].width;
1447 }
1448 }
0200214b
RM
1449
1450 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1451 character set definition gives the number of bytes in the wide
1452 character representation. We compute the number of bytes used
1453 for the UTF-8 encoded form. */
1454 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
6990326c
RM
1455
1456 /* We need the name of the currently used 8-bit character set to
1457 make correct conversion between this 8-bit representation and the
1458 ISO 10646 character set used internally for wide characters. */
1459 ctype->codeset_name = charset->code_set_name;
19bc17a9 1460}