]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-ctype.c
update from main archive 961001
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
28
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
34
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
38
39
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
43
44
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
48
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
67
68
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
71 {
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
75
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 int nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
86
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 u_int32_t map_collection_max[MAX_NR_CHARMAP];
93 u_int32_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
99
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
114 };
115
116
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
129
130
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
134 {
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
137
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
140
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
144
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
152
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
171
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
179
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
186
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
189 {
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
192 }
193 }
194
195
196 void
197 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
198 {
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200 #define NCLASS 12
201 static const struct
202 {
203 const char *name;
204 const char allow[NCLASS];
205 }
206 valid_table[NCLASS] =
207 {
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
222 };
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
227
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
230
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
233 {
234 unsigned long int tmp;
235
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
239
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
244 {
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
247 {
248 case 'M':
249 if (!eq)
250 {
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
254
255 value = ctype->charnames[cnt];
256
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
264
265 error (0, 0, _("\
266 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
267 cp, valid_table[cls1].name,
268 valid_table[cls2].name);
269 }
270 break;
271
272 case 'X':
273 if (eq)
274 {
275 char buf[17];
276 char *cp = buf;
277 unsigned int value;
278
279 value = ctype->charnames[cnt];
280
281 if ((value & 0xff000000) != 0)
282 cp += sprintf (cp, "\\%o", value >> 24);
283 if ((value & 0xffff0000) != 0)
284 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
285 if ((value & 0xffffff00) != 0)
286 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
287 sprintf (cp, "\\%o", value & 0xff);
288
289 error (0, 0, _("\
290 character %s'%s' in class `%s' must not be in class `%s'"),
291 value > 256 ? "L" : "", cp,
292 valid_table[cls1].name, valid_table[cls2].name);
293 }
294 break;
295
296 case 'D':
297 ctype->class_collection[cnt] |= 1 << cls2;
298 break;
299
300 default:
301 error (5, 0, _("internal error in %s, line %u"),
302 __FUNCTION__, __LINE__);
303 }
304 }
305 }
306
307 /* ... and now test <SP> as a special case. */
308 space_value = charset_find_value (charset, "SP", 2);
309 if (space_value == ILLEGAL_CHAR_VALUE)
310 error (0, 0, _("character <SP> not defined in character map"));
311 else if ((cnt = BITPOS (tok_space),
312 (ELEM (ctype, class_collection, , space_value)
313 & BIT (tok_space)) == 0)
314 || (cnt = BITPOS (tok_blank),
315 (ELEM (ctype, class_collection, , space_value)
316 & BIT (tok_blank)) == 0))
317 error (0, 0, _("<SP> character not in class `%s'"),
318 valid_table[cnt].name);
319 else if ((cnt = BITPOS (tok_punct),
320 (ELEM (ctype, class_collection, , space_value)
321 & BIT (tok_punct)) != 0)
322 || (cnt = BITPOS (tok_graph),
323 (ELEM (ctype, class_collection, , space_value)
324 & BIT (tok_graph))
325 != 0))
326 error (0, 0, _("<SP> character must not be in class `%s'"),
327 valid_table[cnt].name);
328 else
329 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
330
331 /* Now that the tests are done make sure the name array contains all
332 characters which are handled in the WIDTH section of the
333 character set definition file. */
334 if (charset->width_rules != NULL)
335 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
336 {
337 size_t inner;
338 for (inner = charset->width_rules[cnt].from;
339 inner <= charset->width_rules[cnt].to; ++inner)
340 (void) find_idx (ctype, NULL, NULL, NULL, inner);
341 }
342 }
343
344
345 void
346 ctype_output (struct localedef_t *locale, struct charset_t *charset,
347 const char *output_path)
348 {
349 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
350 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
351 + 2 * (ctype->map_collection_nr - 2));
352 struct iovec iov[2 + nelems + ctype->nr_charclass
353 + ctype->map_collection_nr];
354 struct locale_file data;
355 u_int32_t idx[nelems];
356 size_t elem, cnt, offset, total;
357
358
359 if ((locale->binary & (1 << LC_CTYPE)) != 0)
360 {
361 iov[0].iov_base = ctype;
362 iov[0].iov_len = locale->len[LC_CTYPE];
363
364 write_locale_data (output_path, "LC_CTYPE", 1, iov);
365
366 return;
367 }
368
369
370 /* Now prepare the output: Find the sizes of the table we can use. */
371 allocate_arrays (ctype, charset);
372
373 data.magic = LIMAGIC (LC_CTYPE);
374 data.n = nelems;
375 iov[0].iov_base = (void *) &data;
376 iov[0].iov_len = sizeof (data);
377
378 iov[1].iov_base = (void *) idx;
379 iov[1].iov_len = sizeof (idx);
380
381 idx[0] = iov[0].iov_len + iov[1].iov_len;
382 offset = 0;
383
384 for (elem = 0; elem < nelems; ++elem)
385 {
386 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
387 switch (elem)
388 {
389 #define CTYPE_DATA(name, base, len) \
390 case _NL_ITEM_INDEX (name): \
391 iov[2 + elem + offset].iov_base = (base); \
392 iov[2 + elem + offset].iov_len = (len); \
393 if (elem + 1 < nelems) \
394 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
395 break
396
397 CTYPE_DATA (_NL_CTYPE_CLASS,
398 ctype->ctype_b,
399 (256 + 128) * sizeof (char_class_t));
400
401 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
402 ctype->map_eb[0],
403 (ctype->plane_size * ctype->plane_cnt + 128)
404 * sizeof (u_int32_t));
405 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
406 ctype->map_eb[1],
407 (ctype->plane_size * ctype->plane_cnt + 128)
408 * sizeof (u_int32_t));
409
410 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
411 ctype->map_el[0],
412 (ctype->plane_size * ctype->plane_cnt + 128)
413 * sizeof (u_int32_t));
414 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
415 ctype->map_el[1],
416 (ctype->plane_size * ctype->plane_cnt + 128)
417 * sizeof (u_int32_t));
418
419 CTYPE_DATA (_NL_CTYPE_CLASS32,
420 ctype->ctype32_b,
421 (ctype->plane_size * ctype->plane_cnt
422 * sizeof (char_class32_t)));
423
424 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
425 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
426 * sizeof (u_int32_t)));
427 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
428 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
429 * sizeof (u_int32_t)));
430
431 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
432 &ctype->plane_size, sizeof (u_int32_t));
433 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
434 &ctype->plane_cnt, sizeof (u_int32_t));
435
436 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
437 /* The class name array. */
438 total = 0;
439 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
440 {
441 iov[2 + elem + offset].iov_base
442 = (void *) ctype->classnames[cnt];
443 iov[2 + elem + offset].iov_len
444 = strlen (ctype->classnames[cnt]) + 1;
445 total += iov[2 + elem + offset].iov_len;
446 }
447 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
448 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
449 total += 1 + (4 - ((total + 1) % 4));
450
451 if (elem + 1 < nelems)
452 idx[elem + 1] = idx[elem] + total;
453 break;
454
455 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
456 /* The class name array. */
457 total = 0;
458 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
459 {
460 iov[2 + elem + offset].iov_base
461 = (void *) ctype->mapnames[cnt];
462 iov[2 + elem + offset].iov_len
463 = strlen (ctype->mapnames[cnt]) + 1;
464 total += iov[2 + elem + offset].iov_len;
465 }
466 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
467 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
468 total += 1 + (4 - ((total + 1) % 4));
469
470 if (elem + 1 < nelems)
471 idx[elem + 1] = idx[elem] + total;
472 break;
473
474 CTYPE_DATA (_NL_CTYPE_WIDTH,
475 ctype->width, ctype->plane_size * ctype->plane_cnt);
476
477 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
478 &ctype->mb_cur_max, sizeof (u_int32_t));
479
480 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
481 total = strlen (ctype->codeset_name) + 1;
482 if (total % 4 == 0)
483 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
484 else
485 {
486 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
487 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
488 total);
489 total = (total + 3) & ~3;
490 }
491 iov[2 + elem + offset].iov_len = total;
492 if (elem + 1 < nelems)
493 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
494 break;
495
496 default:
497 assert (! "unknown CTYPE element");
498 }
499 else
500 {
501 /* Handle extra maps. */
502 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
503
504 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
505 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
506 else
507 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
508
509 iov[2 + elem + offset].iov_len = ((ctype->plane_size
510 * ctype->plane_cnt + 128)
511 * sizeof (u_int32_t));
512
513 if (elem + 1 < nelems)
514 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
515 }
516 }
517
518 assert (2 + elem + offset == (nelems + ctype->nr_charclass
519 + ctype->map_collection_nr + 2));
520
521 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
522 }
523
524
525 /* Character class handling. */
526 void
527 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
528 enum token_t tok, struct token *code,
529 struct charset_t *charset)
530 {
531 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
532 code->val.str.start);
533 }
534
535
536 int
537 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
538 const char *name)
539 {
540 int cnt;
541
542 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
543 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
544 == 0)
545 return 1;
546
547 return 0;
548 }
549
550
551 void
552 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
553 enum token_t tok, const char *str,
554 struct charset_t *charset)
555 {
556 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
557 int cnt;
558
559 switch (tok)
560 {
561 case tok_upper:
562 str = "upper";
563 break;
564 case tok_lower:
565 str = "lower";
566 break;
567 case tok_alpha:
568 str = "alpha";
569 break;
570 case tok_digit:
571 str = "digit";
572 break;
573 case tok_xdigit:
574 str = "xdigit";
575 break;
576 case tok_space:
577 str = "space";
578 break;
579 case tok_print:
580 str = "print";
581 break;
582 case tok_graph:
583 str = "graph";
584 break;
585 case tok_blank:
586 str = "blank";
587 break;
588 case tok_cntrl:
589 str = "cntrl";
590 break;
591 case tok_punct:
592 str = "punct";
593 break;
594 case tok_alnum:
595 str = "alnum";
596 break;
597 case tok_ident:
598 break;
599 default:
600 assert (! "illegal token as class name: should not happen");
601 }
602
603 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
604 if (strcmp (str, ctype->classnames[cnt]) == 0)
605 break;
606
607 if (cnt >= ctype->nr_charclass)
608 assert (! "unknown class in class definition: should not happen");
609
610 ctype->class_done |= BIT (tok);
611
612 ctype->current_class_mask = 1 << cnt;
613 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
614 }
615
616
617 void
618 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
619 struct token *code, struct charset_t *charset)
620 {
621 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
622 unsigned int value;
623
624 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
625
626 ctype->last_class_char = value;
627
628 if (value == ILLEGAL_CHAR_VALUE)
629 /* In the LC_CTYPE category it is no error when a character is
630 not found. This has to be ignored silently. */
631 return;
632
633 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
634 &ctype->class_collection_act, value)
635 |= ctype->current_class_mask;
636 }
637
638
639 void
640 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
641 struct token *code, struct charset_t *charset)
642 {
643 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
644 unsigned int value, cnt;
645
646 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
647
648 assert (value >= ctype->last_class_char);
649
650 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
651 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
652 &ctype->class_collection_act, cnt)
653 |= ctype->current_class_mask;
654
655 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
656 }
657
658
659 void
660 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
661 {
662 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
663
664 /* We have no special actions to perform here. */
665 ctype->current_class_mask = 0;
666 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
667 }
668
669
670 /* Character map handling. */
671 void
672 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
673 enum token_t tok, struct token *code,
674 struct charset_t *charset)
675 {
676 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
677 code->val.str.start, charset);
678 }
679
680
681 int
682 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
683 const char *name)
684 {
685 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
686 size_t cnt;
687
688 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
689 if (strcmp (name, ctype->mapnames[cnt]) == 0)
690 return 1;
691
692 return 0;
693 }
694
695
696 void
697 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
698 enum token_t tok, const char *name, struct charset_t *charset)
699 {
700 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
701 size_t cnt;
702
703 switch (tok)
704 {
705 case tok_toupper:
706 ctype->toupper_done = 1;
707 name = "toupper";
708 break;
709 case tok_tolower:
710 ctype->tolower_done = 1;
711 name = "tolower";
712 break;
713 case tok_ident:
714 break;
715 default:
716 assert (! "unknown token in category `LC_CTYPE' should not happen");
717 }
718
719 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
720 if (strcmp (name, ctype->mapnames[cnt]) == 0)
721 break;
722
723 if (cnt == ctype->map_collection_nr)
724 assert (! "unknown token in category `LC_CTYPE' should not happen");
725
726 ctype->last_map_idx = cnt;
727 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
728 }
729
730
731 void
732 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
733 struct token *code, struct charset_t *charset)
734 {
735 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
736 unsigned int value;
737
738 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
739
740 if (value == ILLEGAL_CHAR_VALUE)
741 /* In the LC_CTYPE category it is no error when a character is
742 not found. This has to be ignored silently. */
743 return;
744
745 assert (ctype->last_map_idx < ctype->map_collection_nr);
746
747 ctype->from_map_char = value;
748 }
749
750
751 void
752 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
753 struct token *code, struct charset_t *charset)
754 {
755 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
756 unsigned int value;
757
758 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
759
760 if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
761 || value == ILLEGAL_CHAR_VALUE)
762 {
763 /* In the LC_CTYPE category it is no error when a character is
764 not found. This has to be ignored silently. */
765 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
766 return;
767 }
768
769 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
770 &ctype->map_collection_max[ctype->last_map_idx],
771 &ctype->map_collection_act[ctype->last_map_idx],
772 ctype->from_map_char) = value;
773
774 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
775 }
776
777
778 void
779 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
780 {
781 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
782
783 ctype->last_map_idx = MAX_NR_CHARMAP;
784 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
785 }
786
787
788 /* Local functions. */
789 static void
790 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
791 const char *name)
792 {
793 int cnt;
794
795 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
796 if (strcmp (ctype->classnames[cnt], name) == 0)
797 break;
798
799 if (cnt < ctype->nr_charclass)
800 {
801 lr_error (lr, _("character class `%s' already defined"));
802 return;
803 }
804
805 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
806 /* Exit code 2 is prescribed in P1003.2b. */
807 error (2, 0, _("\
808 implementation limit: no more than %d character classes allowed"),
809 MAX_NR_CHARCLASS);
810
811 ctype->classnames[ctype->nr_charclass++] = name;
812 }
813
814
815 static void
816 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
817 const char *name, struct charset_t *charset)
818 {
819 size_t max_chars = 0;
820 int cnt;
821
822 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
823 {
824 if (strcmp (ctype->mapnames[cnt], name) == 0)
825 break;
826
827 if (max_chars < ctype->map_collection_max[cnt])
828 max_chars = ctype->map_collection_max[cnt];
829 }
830
831 if (cnt < ctype->map_collection_nr)
832 {
833 lr_error (lr, _("character map `%s' already defined"));
834 return;
835 }
836
837 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
838 /* Exit code 2 is prescribed in P1003.2b. */
839 error (2, 0, _("\
840 implementation limit: no more than %d character maps allowed"),
841 MAX_NR_CHARMAP);
842
843 ctype->mapnames[cnt] = name;
844
845 if (max_chars == 0)
846 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
847 else
848 ctype->map_collection_max[cnt] = max_chars;
849
850 ctype->map_collection[cnt] = (u_int32_t *)
851 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
852 memset (ctype->map_collection[cnt], '\0',
853 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
854 ctype->map_collection_act[cnt] = 256;
855
856 ++ctype->map_collection_nr;
857 }
858
859
860 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
861 is possible if we only want ot extend the name array. */
862 static u_int32_t *
863 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
864 size_t *act, unsigned int idx)
865 {
866 size_t cnt;
867
868 if (idx < 256)
869 return table == NULL ? NULL : &(*table)[idx];
870
871 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
872 if (ctype->charnames[cnt] == idx)
873 break;
874
875 /* We have to distinguish two cases: the names is found or not. */
876 if (cnt == ctype->charnames_act)
877 {
878 /* Extend the name array. */
879 if (ctype->charnames_act == ctype->charnames_max)
880 {
881 ctype->charnames_max *= 2;
882 ctype->charnames = (unsigned int *)
883 xrealloc (ctype->charnames,
884 sizeof (unsigned int) * ctype->charnames_max);
885 }
886 ctype->charnames[ctype->charnames_act++] = idx;
887 }
888
889 if (table == NULL)
890 /* We have done everything we are asked to do. */
891 return NULL;
892
893 if (cnt >= *act)
894 {
895 if (cnt >= *max)
896 {
897 size_t old_max = *max;
898 do
899 *max *= 2;
900 while (*max <= cnt);
901
902 *table =
903 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
904 memset (&(*table)[old_max], '\0',
905 (*max - old_max) * sizeof (u_int32_t));
906 }
907
908 (*table)[cnt] = 0;
909 *act = cnt;
910 }
911
912 return &(*table)[cnt];
913 }
914
915
916 static void
917 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
918 {
919 /* These function defines the default values for the classes and conversions
920 according to POSIX.2 2.5.2.1.
921 It may seem that the order of these if-blocks is arbitrary but it is NOT.
922 Don't move them unless you know what you do! */
923
924 void set_default (int bit, int from, int to)
925 {
926 char tmp[2];
927 int ch;
928 /* Define string. */
929 strcpy (tmp, "?");
930
931 for (ch = from; ch <= to; ++ch)
932 {
933 unsigned int value;
934 tmp[0] = ch;
935
936 value = charset_find_value (charset, tmp, 1);
937 if (value == ILLEGAL_CHAR_VALUE)
938 {
939 error (0, 0, _("\
940 character `%s' not defined while needed as default value"),
941 tmp);
942 continue;
943 }
944 else
945 ELEM (ctype, class_collection, , value) |= bit;
946 }
947 }
948
949 /* Set default values if keyword was not present. */
950 if ((ctype->class_done & BIT (tok_upper)) == 0)
951 /* "If this keyword [lower] is not specified, the lowercase letters
952 `A' through `Z', ..., shall automatically belong to this class,
953 with implementation defined character values." [P1003.2, 2.5.2.1] */
954 set_default (BIT (tok_upper), 'A', 'Z');
955
956 if ((ctype->class_done & BIT (tok_lower)) == 0)
957 /* "If this keyword [lower] is not specified, the lowercase letters
958 `a' through `z', ..., shall automatically belong to this class,
959 with implementation defined character values." [P1003.2, 2.5.2.1] */
960 set_default (BIT (tok_lower), 'a', 'z');
961
962 if ((ctype->class_done & BIT (tok_alpha)) == 0)
963 {
964 /* Table 2-6 in P1003.2 says that characters in class `upper' or
965 class `lower' *must* be in class `alpha'. */
966 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
967 size_t cnt;
968
969 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
970 if ((ctype->class_collection[cnt] & mask) != 0)
971 ctype->class_collection[cnt] |= BIT (tok_alpha);
972 }
973
974 if ((ctype->class_done & BIT (tok_digit)) == 0)
975 /* "If this keyword [digit] is not specified, the digits `0' through
976 `9', ..., shall automatically belong to this class, with
977 implementation-defined character values." [P1003.2, 2.5.2.1] */
978 set_default (BIT (tok_digit), '0', '9');
979
980 /* "Only characters specified for the `alpha' and `digit' keyword
981 shall be specified. Characters specified for the keyword `alpha'
982 and `digit' are automatically included in this class. */
983 {
984 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
985 size_t cnt;
986
987 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
988 if ((ctype->class_collection[cnt] & mask) != 0)
989 ctype->class_collection[cnt] |= BIT (tok_alnum);
990 }
991
992 if ((ctype->class_done & BIT (tok_space)) == 0)
993 /* "If this keyword [space] is not specified, the characters <space>,
994 <form-feed>, <newline>, <carriage-return>, <tab>, and
995 <vertical-tab>, ..., shall automatically belong to this class,
996 with implementation-defined character values." [P1003.2, 2.5.2.1] */
997 {
998 unsigned int value;
999
1000 value = charset_find_value (charset, "space", 5);
1001 if (value == ILLEGAL_CHAR_VALUE)
1002 error (0, 0, _("\
1003 character `%s' not defined while needed as default value"),
1004 "<space>");
1005 else
1006 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1007
1008 value = charset_find_value (charset, "form-feed", 9);
1009 if (value == ILLEGAL_CHAR_VALUE)
1010 error (0, 0, _("\
1011 character `%s' not defined while needed as default value"),
1012 "<form-feed>");
1013 else
1014 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1015
1016 value = charset_find_value (charset, "newline", 7);
1017 if (value == ILLEGAL_CHAR_VALUE)
1018 error (0, 0, _("\
1019 character `%s' not defined while needed as default value"),
1020 "<newline>");
1021 else
1022 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1023
1024 value = charset_find_value (charset, "carriage-return", 15);
1025 if (value == ILLEGAL_CHAR_VALUE)
1026 error (0, 0, _("\
1027 character `%s' not defined while needed as default value"),
1028 "<carriage-return>");
1029 else
1030 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1031
1032 value = charset_find_value (charset, "tab", 3);
1033 if (value == ILLEGAL_CHAR_VALUE)
1034 error (0, 0, _("\
1035 character `%s' not defined while needed as default value"),
1036 "<tab>");
1037 else
1038 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1039
1040 value = charset_find_value (charset, "vertical-tab", 12);
1041 if (value == ILLEGAL_CHAR_VALUE)
1042 error (0, 0, _("\
1043 character `%s' not defined while needed as default value"),
1044 "<vertical-tab>");
1045 else
1046 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1047 }
1048
1049 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1050 /* "If this keyword is not specified, the digits `0' to `9', the
1051 uppercase letters `A' through `F', and the lowercase letters `a'
1052 through `f', ..., shell automatically belong to this class, with
1053 implementation defined character values." [P1003.2, 2.5.2.1] */
1054 {
1055 set_default (BIT (tok_xdigit), '0', '9');
1056 set_default (BIT (tok_xdigit), 'A', 'F');
1057 set_default (BIT (tok_xdigit), 'a', 'f');
1058 }
1059
1060 if ((ctype->class_done & BIT (tok_blank)) == 0)
1061 /* "If this keyword [blank] is unspecified, the characters <space> and
1062 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1063 {
1064 unsigned int value;
1065
1066 value = charset_find_value (charset, "space", 5);
1067 if (value == ILLEGAL_CHAR_VALUE)
1068 error (0, 0, _("\
1069 character `%s' not defined while needed as default value"),
1070 "<space>");
1071 else
1072 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1073
1074 value = charset_find_value (charset, "tab", 3);
1075 if (value == ILLEGAL_CHAR_VALUE)
1076 error (0, 0, _("\
1077 character `%s' not defined while needed as default value"),
1078 "<tab>");
1079 else
1080 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1081 }
1082
1083 if ((ctype->class_done & BIT (tok_graph)) == 0)
1084 /* "If this keyword [graph] is not specified, characters specified for
1085 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1086 shall belong to this character class." [P1003.2, 2.5.2.1] */
1087 {
1088 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1089 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1090 size_t cnt;
1091
1092 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1093 if ((ctype->class_collection[cnt] & mask) != 0)
1094 ctype->class_collection[cnt] |= BIT (tok_graph);
1095 }
1096
1097 if ((ctype->class_done & BIT (tok_print)) == 0)
1098 /* "If this keyword [print] is not provided, characters specified for
1099 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1100 and the <space> character shall belong to this character class."
1101 [P1003.2, 2.5.2.1] */
1102 {
1103 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1104 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1105 size_t cnt;
1106 int space;
1107
1108 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1109 if ((ctype->class_collection[cnt] & mask) != 0)
1110 ctype->class_collection[cnt] |= BIT (tok_print);
1111
1112 space = charset_find_value (charset, "space", 5);
1113 if (space == ILLEGAL_CHAR_VALUE)
1114 error (0, 0, _("\
1115 character `%s' not defined while needed as default value"),
1116 "<space>");
1117 else
1118 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1119 }
1120
1121 if (ctype->toupper_done == 0)
1122 /* "If this keyword [toupper] is not spcified, the lowercase letters
1123 `a' through `z', and their corresponding uppercase letters `A' to
1124 `Z', ..., shall automatically be included, with implementation-
1125 defined character values." [P1003.2, 2.5.2.1] */
1126 {
1127 char tmp[4];
1128 int ch;
1129
1130 strcpy (tmp, "<?>");
1131
1132 for (ch = 'a'; ch <= 'z'; ++ch)
1133 {
1134 unsigned int value_from, value_to;
1135
1136 tmp[1] = (char) ch;
1137
1138 value_from = charset_find_value (charset, &tmp[1], 1);
1139 if (value_from == ILLEGAL_CHAR_VALUE)
1140 {
1141 error (0, 0, _("\
1142 character `%c' not defined while needed as default value"),
1143 tmp);
1144 continue;
1145 }
1146
1147 /* This conversion is implementation defined. */
1148 tmp[1] = (char) (ch + ('A' - 'a'));
1149 value_to = charset_find_value (charset, &tmp[1], 1);
1150 if (value_to == -1)
1151 {
1152 error (0, 0, _("\
1153 character `%s' not defined while needed as default value"),
1154 tmp);
1155 continue;
1156 }
1157
1158 /* The index [0] is determined by the order of the
1159 `ctype_map_newP' calls in `ctype_startup'. */
1160 ELEM (ctype, map_collection, [0], value_from) = value_to;
1161 }
1162 }
1163
1164 if (ctype->tolower_done == 0)
1165 /* "If this keyword [tolower] is not specified, the mapping shall be
1166 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1167 {
1168 size_t cnt;
1169
1170 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1171 if (ctype->map_collection[0][cnt] != 0)
1172 ELEM (ctype, map_collection, [1],
1173 ctype->map_collection[0][cnt])
1174 = ctype->charnames[cnt];
1175 }
1176 }
1177
1178
1179 static void
1180 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1181 {
1182 size_t idx;
1183
1184 /* First we have to decide how we organize the arrays. It is easy for
1185 a one-byte character set. But multi-byte character set cannot be
1186 stored flat because they might be sparsly used. So we determine an
1187 optimal hashing function for the used characters.
1188
1189 We use a very trivial hashing function to store the sparse table.
1190 CH % TABSIZE is used as an index. To solve multiple hits we have
1191 N planes. This gurantees a fixed search time for a character [N
1192 / 2]. In the following code we determine the minmum value for
1193 TABSIZE * N, where TABSIZE >= 256. */
1194 size_t min_total = UINT_MAX;
1195 size_t act_size = 256;
1196
1197 fputs (_("\
1198 Computing table size for character classes might take a while..."),
1199 stderr);
1200
1201 while (act_size < min_total)
1202 {
1203 size_t cnt[act_size];
1204 size_t act_planes = 1;
1205
1206 memset (cnt, '\0', sizeof cnt);
1207
1208 for (idx = 0; idx < 256; ++idx)
1209 cnt[idx] = 1;
1210
1211 for (idx = 0; idx < ctype->charnames_act; ++idx)
1212 if (ctype->charnames[idx] >= 256)
1213 {
1214 size_t nr = ctype->charnames[idx] % act_size;
1215
1216 if (++cnt[nr] > act_planes)
1217 {
1218 act_planes = cnt[nr];
1219 if (act_size * act_planes >= min_total)
1220 break;
1221 }
1222 }
1223
1224 if (act_size * act_planes < min_total)
1225 {
1226 min_total = act_size * act_planes;
1227 ctype->plane_size = act_size;
1228 ctype->plane_cnt = act_planes;
1229 }
1230
1231 ++act_size;
1232 }
1233
1234 fprintf (stderr, _(" done\n"));
1235
1236
1237 #if __BYTE_ORDER == __LITTLE_ENDIAN
1238 # define NAMES_B1 ctype->names_el
1239 # define NAMES_B2 ctype->names_eb
1240 #else
1241 # define NAMES_B1 ctype->names_eb
1242 # define NAMES_B2 ctype->names_el
1243 #endif
1244
1245 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1246 * ctype->plane_cnt,
1247 sizeof (u_int32_t));
1248 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1249 * ctype->plane_cnt,
1250 sizeof (u_int32_t));
1251
1252 for (idx = 1; idx < 256; ++idx)
1253 NAMES_B1[idx] = idx;
1254
1255 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1256 NAMES_B1[0] = 1;
1257
1258 for (idx = 256; idx < ctype->charnames_act; ++idx)
1259 {
1260 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1261 size_t depth = 0;
1262
1263 while (NAMES_B1[nr + depth * ctype->plane_size])
1264 ++depth;
1265 assert (depth < ctype->plane_cnt);
1266
1267 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1268
1269 /* Now for faster access remember the index in the NAMES_B array. */
1270 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1271 }
1272 NAMES_B1[0] = 0;
1273
1274 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1275 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1276
1277
1278 /* You wonder about this amount of memory? This is only because some
1279 users do not manage to address the array with unsigned values or
1280 data types with range >= 256. '\200' would result in the array
1281 index -128. To help these poor people we duplicate the entries for
1282 128 up to 255 below the entry for \0. */
1283 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1284 sizeof (char_class_t));
1285 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1286 * ctype->plane_cnt,
1287 sizeof (char_class32_t));
1288
1289 /* Fill in the character class information. */
1290 #if __BYTE_ORDER == __LITTLE_ENDIAN
1291 # define TRANS(w) CHAR_CLASS_TRANS (w)
1292 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1293 #else
1294 # define TRANS(w) (w)
1295 # define TRANS32(w) (w)
1296 #endif
1297
1298 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1299 if (ctype->charnames[idx] < 256)
1300 ctype->ctype_b[128 + ctype->charnames[idx]]
1301 = TRANS (ctype->class_collection[idx]);
1302
1303 /* Mirror first 127 entries. We must take care that entry -1 is not
1304 mirrored because EOF == -1. */
1305 for (idx = 0; idx < 127; ++idx)
1306 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1307
1308 /* The 32 bit array contains all characters. */
1309 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1310 ctype->ctype32_b[ctype->charnames[idx]]
1311 = TRANS32 (ctype->class_collection[idx]);
1312
1313 /* Room for table of mappings. */
1314 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1315 * sizeof (u_int32_t *));
1316 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1317 * sizeof (u_int32_t *));
1318
1319 /* Fill in all mappings. */
1320 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1321 {
1322 unsigned int idx2;
1323
1324 /* Allocate table. */
1325 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1326 * ctype->plane_cnt + 128)
1327 * sizeof (u_int32_t));
1328 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1329 * ctype->plane_cnt + 128)
1330 * sizeof (u_int32_t));
1331
1332 #if __BYTE_ORDER == __LITTLE_ENDIAN
1333 # define MAP_B1 ctype->map_el
1334 # define MAP_B2 ctype->map_eb
1335 #else
1336 # define MAP_B1 ctype->map_eb
1337 # define MAP_B2 ctype->map_el
1338 #endif
1339
1340 /* Copy default value (identity mapping). */
1341 memcpy (&MAP_B1[idx][128], NAMES_B1,
1342 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1343
1344 /* Copy values from collection. */
1345 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1346 if (ctype->map_collection[idx][idx2] != 0)
1347 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1348 ctype->map_collection[idx][idx2];
1349
1350 /* Mirror first 127 entries. We must take care not to map entry
1351 -1 because EOF == -1. */
1352 for (idx2 = 0; idx2 < 127; ++idx2)
1353 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1354
1355 /* EOF must map to EOF. */
1356 MAP_B1[idx][127] = EOF;
1357
1358 /* And now the other byte order. */
1359 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1360 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1361 }
1362
1363 /* Extra array for class and map names. */
1364 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1365 * sizeof (u_int32_t));
1366 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1367 * sizeof (u_int32_t));
1368
1369 /* Array for width information. Because the expected width are very
1370 small we use only one single byte. This save space and we need
1371 not provide the information twice with both endianesses. */
1372 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1373 * ctype->plane_cnt);
1374 /* Initialize with default width value. */
1375 memset (ctype->width, charset->width_default,
1376 ctype->plane_size * ctype->plane_cnt);
1377 if (charset->width_rules != NULL)
1378 {
1379 size_t cnt;
1380
1381 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1382 if (charset->width_rules[cnt].width != charset->width_default)
1383 for (idx = charset->width_rules[cnt].from;
1384 idx <= charset->width_rules[cnt].to; ++idx)
1385 {
1386 size_t nr = idx % ctype->plane_size;
1387 size_t depth = 0;
1388
1389 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1390 ++depth;
1391 assert (depth < ctype->plane_cnt);
1392
1393 ctype->width[nr + depth * ctype->plane_size]
1394 = charset->width_rules[cnt].width;
1395 }
1396 }
1397
1398 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1399 character set definition gives the number of bytes in the wide
1400 character representation. We compute the number of bytes used
1401 for the UTF-8 encoded form. */
1402 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1403
1404 /* We need the name of the currently used 8-bit character set to
1405 make correct conversion between this 8-bit representation and the
1406 ISO 10646 character set used internally for wide characters. */
1407 ctype->codeset_name = charset->code_set_name;
1408 }