]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
28
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
34
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
38
39
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
43
44
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
48
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59 #define XSWAPU32(w) \
60 ((((w) & 0xff00ff00) >> 8) | (((w) & 0xff00ff) << 8))
61
62
63 /* To be compatible with former implementations we for now restrict
64 the number of bits for character classes to 16. When compatibility
65 is not necessary anymore increase the number to 32. */
66 #define char_class_t u_int16_t
67 #define CHAR_CLASS_TRANS SWAPU16
68 #define char_class32_t u_int32_t
69 #define CHAR_CLASS32_TRANS XSWAPU32
70
71
72 /* The real definition of the struct for the LC_CTYPE locale. */
73 struct locale_ctype_t
74 {
75 unsigned int *charnames;
76 size_t charnames_max;
77 size_t charnames_act;
78
79 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
80 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
81 size_t nr_charclass;
82 const char *classnames[MAX_NR_CHARCLASS];
83 unsigned long int current_class_mask;
84 unsigned int last_class_char;
85 u_int32_t *class_collection;
86 size_t class_collection_max;
87 size_t class_collection_act;
88 unsigned long int class_done;
89
90 /* If the following number ever turns out to be too small simply
91 increase it. But I doubt it will. --drepper@gnu */
92 #define MAX_NR_CHARMAP 16
93 const char *mapnames[MAX_NR_CHARMAP];
94 u_int32_t *map_collection[MAX_NR_CHARMAP];
95 size_t map_collection_max[MAX_NR_CHARMAP];
96 size_t map_collection_act[MAX_NR_CHARMAP];
97 size_t map_collection_nr;
98 size_t last_map_idx;
99 unsigned int from_map_char;
100 int toupper_done;
101 int tolower_done;
102
103 /* The arrays for the binary representation. */
104 u_int32_t plane_size;
105 u_int32_t plane_cnt;
106 char_class_t *ctype_b;
107 char_class32_t *ctype32_b;
108 u_int32_t *names_el;
109 u_int32_t *names_eb;
110 u_int32_t **map_eb;
111 u_int32_t **map_el;
112 u_int32_t *class_name_ptr;
113 u_int32_t *map_name_ptr;
114 unsigned char *width;
115 u_int32_t mb_cur_max;
116 const char *codeset_name;
117 };
118
119
120 /* Prototypes for local functions. */
121 static void ctype_class_newP (struct linereader *lr,
122 struct locale_ctype_t *ctype, const char *name);
123 static void ctype_map_newP (struct linereader *lr,
124 struct locale_ctype_t *ctype,
125 const char *name, struct charset_t *charset);
126 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
127 size_t *max, size_t *act, unsigned int idx);
128 static void set_class_defaults (struct locale_ctype_t *ctype,
129 struct charset_t *charset);
130 static void allocate_arrays (struct locale_ctype_t *ctype,
131 struct charset_t *charset);
132
133
134 void
135 ctype_startup (struct linereader *lr, struct localedef_t *locale,
136 struct charset_t *charset)
137 {
138 unsigned int cnt;
139 struct locale_ctype_t *ctype;
140
141 /* It is important that we always use UCS1 encoding for strings now. */
142 encoding_method = ENC_UCS1;
143
144 /* Allocate the needed room. */
145 locale->categories[LC_CTYPE].ctype = ctype =
146 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
147
148 /* We have no names seen yet. */
149 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
150 ctype->charnames =
151 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
152 for (cnt = 0; cnt < 256; ++cnt)
153 ctype->charnames[cnt] = cnt;
154 ctype->charnames_act = 256;
155
156 /* Fill character class information. */
157 ctype->nr_charclass = 0;
158 ctype->current_class_mask = 0;
159 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
160 /* The order of the following instructions determines the bit
161 positions! */
162 ctype_class_newP (lr, ctype, "upper");
163 ctype_class_newP (lr, ctype, "lower");
164 ctype_class_newP (lr, ctype, "alpha");
165 ctype_class_newP (lr, ctype, "digit");
166 ctype_class_newP (lr, ctype, "xdigit");
167 ctype_class_newP (lr, ctype, "space");
168 ctype_class_newP (lr, ctype, "print");
169 ctype_class_newP (lr, ctype, "graph");
170 ctype_class_newP (lr, ctype, "blank");
171 ctype_class_newP (lr, ctype, "cntrl");
172 ctype_class_newP (lr, ctype, "punct");
173 ctype_class_newP (lr, ctype, "alnum");
174
175 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
176 ctype->class_collection
177 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
178 * ctype->class_collection_max);
179 memset (ctype->class_collection, '\0',
180 sizeof (unsigned long int) * ctype->class_collection_max);
181 ctype->class_collection_act = 256;
182
183 /* Fill character map information. */
184 ctype->map_collection_nr = 0;
185 ctype->last_map_idx = MAX_NR_CHARMAP;
186 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
187 ctype_map_newP (lr, ctype, "toupper", charset);
188 ctype_map_newP (lr, ctype, "tolower", charset);
189
190 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
191 for (cnt = 0; cnt < 256; ++cnt)
192 {
193 ctype->map_collection[0][cnt] = cnt;
194 ctype->map_collection[1][cnt] = cnt;
195 }
196 }
197
198
199 void
200 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
201 {
202 /* See POSIX.2, table 2-6 for the meaning of the following table. */
203 #define NCLASS 12
204 static const struct
205 {
206 const char *name;
207 const char allow[NCLASS];
208 }
209 valid_table[NCLASS] =
210 {
211 /* The order is important. See token.h for more information.
212 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
213 { "upper", "--MX-XDDXXX-" },
214 { "lower", "--MX-XDDXXX-" },
215 { "alpha", "---X-XDDXXX-" },
216 { "digit", "XXX--XDDXXX-" },
217 { "xdigit", "-----XDDXXX-" },
218 { "space", "XXXXX------X" },
219 { "print", "---------X--" },
220 { "graph", "---------X--" },
221 { "blank", "XXXXXM-----X" },
222 { "cntrl", "XXXXX-XX--XX" },
223 { "punct", "XXXXX-DD-X-X" },
224 { "alnum", "-----XDDXXX-" }
225 };
226 size_t cnt;
227 int cls1, cls2;
228 unsigned int space_value;
229 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
230
231 /* Set default value for classes not specified. */
232 set_class_defaults (ctype, charset);
233
234 /* Check according to table. */
235 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
236 {
237 unsigned long int tmp;
238
239 tmp = ctype->class_collection[cnt];
240 if (tmp == 0)
241 continue;
242
243 for (cls1 = 0; cls1 < NCLASS; ++cls1)
244 if ((tmp & (1 << cls1)) != 0)
245 for (cls2 = 0; cls2 < NCLASS; ++cls2)
246 if (valid_table[cls1].allow[cls2] != '-')
247 {
248 int eq = (tmp & (1 << cls2)) != 0;
249 switch (valid_table[cls1].allow[cls2])
250 {
251 case 'M':
252 if (!eq)
253 {
254 char buf[17];
255 char *cp = buf;
256 unsigned int value;
257
258 value = ctype->charnames[cnt];
259
260 if ((value & 0xff000000) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
262 if ((value & 0xffff0000) != 0)
263 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
264 if ((value & 0xffffff00) != 0)
265 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
266 sprintf (cp, "\\%o", value & 0xff);
267
268 if (!be_quiet)
269 error (0, 0, _("\
270 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
271 cp, valid_table[cls1].name,
272 valid_table[cls2].name);
273 }
274 break;
275
276 case 'X':
277 if (eq)
278 {
279 char buf[17];
280 char *cp = buf;
281 unsigned int value;
282
283 value = ctype->charnames[cnt];
284
285 if ((value & 0xff000000) != 0)
286 cp += sprintf (cp, "\\%o", value >> 24);
287 if ((value & 0xffff0000) != 0)
288 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
289 if ((value & 0xffffff00) != 0)
290 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
291 sprintf (cp, "\\%o", value & 0xff);
292
293 if (!be_quiet)
294 error (0, 0, _("\
295 character %s'%s' in class `%s' must not be in class `%s'"),
296 value > 256 ? "L" : "", cp,
297 valid_table[cls1].name,
298 valid_table[cls2].name);
299 }
300 break;
301
302 case 'D':
303 ctype->class_collection[cnt] |= 1 << cls2;
304 break;
305
306 default:
307 error (5, 0, _("internal error in %s, line %u"),
308 __FUNCTION__, __LINE__);
309 }
310 }
311 }
312
313 /* ... and now test <SP> as a special case. */
314 space_value = charset_find_value (charset, "SP", 2);
315 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE && !be_quiet)
316 error (0, 0, _("character <SP> not defined in character map"));
317 else if (((cnt = BITPOS (tok_space),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_space)) == 0)
320 || (cnt = BITPOS (tok_blank),
321 (ELEM (ctype, class_collection, , space_value)
322 & BIT (tok_blank)) == 0))
323 && !be_quiet)
324 error (0, 0, _("<SP> character not in class `%s'"),
325 valid_table[cnt].name);
326 else if (((cnt = BITPOS (tok_punct),
327 (ELEM (ctype, class_collection, , space_value)
328 & BIT (tok_punct)) != 0)
329 || (cnt = BITPOS (tok_graph),
330 (ELEM (ctype, class_collection, , space_value)
331 & BIT (tok_graph))
332 != 0))
333 && !be_quiet)
334 error (0, 0, _("<SP> character must not be in class `%s'"),
335 valid_table[cnt].name);
336 else
337 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
338
339 /* Now that the tests are done make sure the name array contains all
340 characters which are handled in the WIDTH section of the
341 character set definition file. */
342 if (charset->width_rules != NULL)
343 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
344 {
345 size_t inner;
346 for (inner = charset->width_rules[cnt].from;
347 inner <= charset->width_rules[cnt].to; ++inner)
348 (void) find_idx (ctype, NULL, NULL, NULL, inner);
349 }
350 }
351
352
353 void
354 ctype_output (struct localedef_t *locale, struct charset_t *charset,
355 const char *output_path)
356 {
357 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
358 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
359 + 2 * (ctype->map_collection_nr - 2));
360 struct iovec iov[2 + nelems + ctype->nr_charclass
361 + ctype->map_collection_nr];
362 struct locale_file data;
363 u_int32_t idx[nelems];
364 size_t elem, cnt, offset, total;
365
366
367 if ((locale->binary & (1 << LC_CTYPE)) != 0)
368 {
369 iov[0].iov_base = ctype;
370 iov[0].iov_len = locale->len[LC_CTYPE];
371
372 write_locale_data (output_path, "LC_CTYPE", 1, iov);
373
374 return;
375 }
376
377
378 /* Now prepare the output: Find the sizes of the table we can use. */
379 allocate_arrays (ctype, charset);
380
381 data.magic = LIMAGIC (LC_CTYPE);
382 data.n = nelems;
383 iov[0].iov_base = (void *) &data;
384 iov[0].iov_len = sizeof (data);
385
386 iov[1].iov_base = (void *) idx;
387 iov[1].iov_len = sizeof (idx);
388
389 idx[0] = iov[0].iov_len + iov[1].iov_len;
390 offset = 0;
391
392 for (elem = 0; elem < nelems; ++elem)
393 {
394 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
395 switch (elem)
396 {
397 #define CTYPE_DATA(name, base, len) \
398 case _NL_ITEM_INDEX (name): \
399 iov[2 + elem + offset].iov_base = (base); \
400 iov[2 + elem + offset].iov_len = (len); \
401 if (elem + 1 < nelems) \
402 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
403 break
404
405 CTYPE_DATA (_NL_CTYPE_CLASS,
406 ctype->ctype_b,
407 (256 + 128) * sizeof (char_class_t));
408
409 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
410 ctype->map_eb[0],
411 (ctype->plane_size * ctype->plane_cnt + 128)
412 * sizeof (u_int32_t));
413 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
414 ctype->map_eb[1],
415 (ctype->plane_size * ctype->plane_cnt + 128)
416 * sizeof (u_int32_t));
417
418 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
419 ctype->map_el[0],
420 (ctype->plane_size * ctype->plane_cnt + 128)
421 * sizeof (u_int32_t));
422 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
423 ctype->map_el[1],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
426
427 CTYPE_DATA (_NL_CTYPE_CLASS32,
428 ctype->ctype32_b,
429 (ctype->plane_size * ctype->plane_cnt
430 * sizeof (char_class32_t)));
431
432 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
433 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
434 * sizeof (u_int32_t)));
435 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
436 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
437 * sizeof (u_int32_t)));
438
439 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
440 &ctype->plane_size, sizeof (u_int32_t));
441 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
442 &ctype->plane_cnt, sizeof (u_int32_t));
443
444 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
445 /* The class name array. */
446 total = 0;
447 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
448 {
449 iov[2 + elem + offset].iov_base
450 = (void *) ctype->classnames[cnt];
451 iov[2 + elem + offset].iov_len
452 = strlen (ctype->classnames[cnt]) + 1;
453 total += iov[2 + elem + offset].iov_len;
454 }
455 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
456 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
457 total += 1 + (4 - ((total + 1) % 4));
458
459 if (elem + 1 < nelems)
460 idx[elem + 1] = idx[elem] + total;
461 break;
462
463 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
464 /* The class name array. */
465 total = 0;
466 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
467 {
468 iov[2 + elem + offset].iov_base
469 = (void *) ctype->mapnames[cnt];
470 iov[2 + elem + offset].iov_len
471 = strlen (ctype->mapnames[cnt]) + 1;
472 total += iov[2 + elem + offset].iov_len;
473 }
474 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
475 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
476 total += 1 + (4 - ((total + 1) % 4));
477
478 if (elem + 1 < nelems)
479 idx[elem + 1] = idx[elem] + total;
480 break;
481
482 CTYPE_DATA (_NL_CTYPE_WIDTH,
483 ctype->width, ctype->plane_size * ctype->plane_cnt);
484
485 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
486 &ctype->mb_cur_max, sizeof (u_int32_t));
487
488 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
489 total = strlen (ctype->codeset_name) + 1;
490 if (total % 4 == 0)
491 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
492 else
493 {
494 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
495 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
496 total);
497 total = (total + 3) & ~3;
498 }
499 iov[2 + elem + offset].iov_len = total;
500 if (elem + 1 < nelems)
501 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
502 break;
503
504 default:
505 assert (! "unknown CTYPE element");
506 }
507 else
508 {
509 /* Handle extra maps. */
510 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
511
512 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
513 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
514 else
515 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
516
517 iov[2 + elem + offset].iov_len = ((ctype->plane_size
518 * ctype->plane_cnt + 128)
519 * sizeof (u_int32_t));
520
521 if (elem + 1 < nelems)
522 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
523 }
524 }
525
526 assert (2 + elem + offset == (nelems + ctype->nr_charclass
527 + ctype->map_collection_nr + 2));
528
529 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
530 }
531
532
533 /* Character class handling. */
534 void
535 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
536 enum token_t tok, struct token *code,
537 struct charset_t *charset)
538 {
539 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
540 code->val.str.start);
541 }
542
543
544 int
545 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
546 const char *name)
547 {
548 size_t cnt;
549
550 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
551 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
552 == 0)
553 return 1;
554
555 return 0;
556 }
557
558
559 void
560 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
561 enum token_t tok, const char *str,
562 struct charset_t *charset)
563 {
564 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
565 size_t cnt;
566
567 switch (tok)
568 {
569 case tok_upper:
570 str = "upper";
571 break;
572 case tok_lower:
573 str = "lower";
574 break;
575 case tok_alpha:
576 str = "alpha";
577 break;
578 case tok_digit:
579 str = "digit";
580 break;
581 case tok_xdigit:
582 str = "xdigit";
583 break;
584 case tok_space:
585 str = "space";
586 break;
587 case tok_print:
588 str = "print";
589 break;
590 case tok_graph:
591 str = "graph";
592 break;
593 case tok_blank:
594 str = "blank";
595 break;
596 case tok_cntrl:
597 str = "cntrl";
598 break;
599 case tok_punct:
600 str = "punct";
601 break;
602 case tok_alnum:
603 str = "alnum";
604 break;
605 case tok_ident:
606 break;
607 default:
608 assert (! "illegal token as class name: should not happen");
609 }
610
611 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
612 if (strcmp (str, ctype->classnames[cnt]) == 0)
613 break;
614
615 if (cnt >= ctype->nr_charclass)
616 assert (! "unknown class in class definition: should not happen");
617
618 ctype->class_done |= BIT (tok);
619
620 ctype->current_class_mask = 1 << cnt;
621 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
622 }
623
624
625 void
626 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
627 struct token *code, struct charset_t *charset)
628 {
629 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
630 unsigned int value;
631
632 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
633
634 ctype->last_class_char = value;
635
636 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
637 /* In the LC_CTYPE category it is no error when a character is
638 not found. This has to be ignored silently. */
639 return;
640
641 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
642 &ctype->class_collection_act, value)
643 |= ctype->current_class_mask;
644 }
645
646
647 void
648 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
649 struct token *code, struct charset_t *charset)
650 {
651 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
652 unsigned int value, cnt;
653
654 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
655
656 assert (value >= ctype->last_class_char);
657
658 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
659 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
660 &ctype->class_collection_act, cnt)
661 |= ctype->current_class_mask;
662
663 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
664 }
665
666
667 void
668 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
669 {
670 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
671
672 /* We have no special actions to perform here. */
673 ctype->current_class_mask = 0;
674 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
675 }
676
677
678 /* Character map handling. */
679 void
680 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
681 enum token_t tok, struct token *code,
682 struct charset_t *charset)
683 {
684 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
685 code->val.str.start, charset);
686 }
687
688
689 int
690 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
691 const char *name)
692 {
693 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
694 size_t cnt;
695
696 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
697 if (strcmp (name, ctype->mapnames[cnt]) == 0)
698 return 1;
699
700 return 0;
701 }
702
703
704 void
705 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
706 enum token_t tok, const char *name, struct charset_t *charset)
707 {
708 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
709 size_t cnt;
710
711 switch (tok)
712 {
713 case tok_toupper:
714 ctype->toupper_done = 1;
715 name = "toupper";
716 break;
717 case tok_tolower:
718 ctype->tolower_done = 1;
719 name = "tolower";
720 break;
721 case tok_ident:
722 break;
723 default:
724 assert (! "unknown token in category `LC_CTYPE' should not happen");
725 }
726
727 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
728 if (strcmp (name, ctype->mapnames[cnt]) == 0)
729 break;
730
731 if (cnt == ctype->map_collection_nr)
732 assert (! "unknown token in category `LC_CTYPE' should not happen");
733
734 ctype->last_map_idx = cnt;
735 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
736 }
737
738
739 void
740 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
741 struct token *code, struct charset_t *charset)
742 {
743 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
744 unsigned int value;
745
746 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
747
748 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
749 /* In the LC_CTYPE category it is no error when a character is
750 not found. This has to be ignored silently. */
751 return;
752
753 assert (ctype->last_map_idx < ctype->map_collection_nr);
754
755 ctype->from_map_char = value;
756 }
757
758
759 void
760 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
761 struct token *code, struct charset_t *charset)
762 {
763 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
764 unsigned int value;
765
766 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
767
768 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
769 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
770 {
771 /* In the LC_CTYPE category it is no error when a character is
772 not found. This has to be ignored silently. */
773 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
774 return;
775 }
776
777 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
778 &ctype->map_collection_max[ctype->last_map_idx],
779 &ctype->map_collection_act[ctype->last_map_idx],
780 ctype->from_map_char) = value;
781
782 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
783 }
784
785
786 void
787 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
788 {
789 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
790
791 ctype->last_map_idx = MAX_NR_CHARMAP;
792 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
793 }
794
795
796 /* Local functions. */
797 static void
798 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
799 const char *name)
800 {
801 size_t cnt;
802
803 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
804 if (strcmp (ctype->classnames[cnt], name) == 0)
805 break;
806
807 if (cnt < ctype->nr_charclass)
808 {
809 lr_error (lr, _("character class `%s' already defined"), name);
810 return;
811 }
812
813 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
814 /* Exit code 2 is prescribed in P1003.2b. */
815 error (2, 0, _("\
816 implementation limit: no more than %d character classes allowed"),
817 MAX_NR_CHARCLASS);
818
819 ctype->classnames[ctype->nr_charclass++] = name;
820 }
821
822
823 static void
824 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
825 const char *name, struct charset_t *charset)
826 {
827 size_t max_chars = 0;
828 size_t cnt;
829
830 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
831 {
832 if (strcmp (ctype->mapnames[cnt], name) == 0)
833 break;
834
835 if (max_chars < ctype->map_collection_max[cnt])
836 max_chars = ctype->map_collection_max[cnt];
837 }
838
839 if (cnt < ctype->map_collection_nr)
840 {
841 lr_error (lr, _("character map `%s' already defined"), name);
842 return;
843 }
844
845 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
846 /* Exit code 2 is prescribed in P1003.2b. */
847 error (2, 0, _("\
848 implementation limit: no more than %d character maps allowed"),
849 MAX_NR_CHARMAP);
850
851 ctype->mapnames[cnt] = name;
852
853 if (max_chars == 0)
854 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
855 else
856 ctype->map_collection_max[cnt] = max_chars;
857
858 ctype->map_collection[cnt] = (u_int32_t *)
859 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
860 memset (ctype->map_collection[cnt], '\0',
861 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
862 ctype->map_collection_act[cnt] = 256;
863
864 ++ctype->map_collection_nr;
865 }
866
867
868 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
869 is possible if we only want ot extend the name array. */
870 static u_int32_t *
871 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
872 size_t *act, unsigned int idx)
873 {
874 size_t cnt;
875
876 if (idx < 256)
877 return table == NULL ? NULL : &(*table)[idx];
878
879 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
880 if (ctype->charnames[cnt] == idx)
881 break;
882
883 /* We have to distinguish two cases: the names is found or not. */
884 if (cnt == ctype->charnames_act)
885 {
886 /* Extend the name array. */
887 if (ctype->charnames_act == ctype->charnames_max)
888 {
889 ctype->charnames_max *= 2;
890 ctype->charnames = (unsigned int *)
891 xrealloc (ctype->charnames,
892 sizeof (unsigned int) * ctype->charnames_max);
893 }
894 ctype->charnames[ctype->charnames_act++] = idx;
895 }
896
897 if (table == NULL)
898 /* We have done everything we are asked to do. */
899 return NULL;
900
901 if (cnt >= *act)
902 {
903 if (cnt >= *max)
904 {
905 size_t old_max = *max;
906 do
907 *max *= 2;
908 while (*max <= cnt);
909
910 *table =
911 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
912 memset (&(*table)[old_max], '\0',
913 (*max - old_max) * sizeof (u_int32_t));
914 }
915
916 (*table)[cnt] = 0;
917 *act = cnt;
918 }
919
920 return &(*table)[cnt];
921 }
922
923
924 static void
925 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
926 {
927 /* These function defines the default values for the classes and conversions
928 according to POSIX.2 2.5.2.1.
929 It may seem that the order of these if-blocks is arbitrary but it is NOT.
930 Don't move them unless you know what you do! */
931
932 void set_default (int bit, int from, int to)
933 {
934 char tmp[2];
935 int ch;
936 /* Define string. */
937 strcpy (tmp, "?");
938
939 for (ch = from; ch <= to; ++ch)
940 {
941 unsigned int value;
942 tmp[0] = ch;
943
944 value = charset_find_value (charset, tmp, 1);
945 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
946 {
947 error (0, 0, _("\
948 character `%s' not defined while needed as default value"),
949 tmp);
950 continue;
951 }
952 else
953 ELEM (ctype, class_collection, , value) |= bit;
954 }
955 }
956
957 /* Set default values if keyword was not present. */
958 if ((ctype->class_done & BIT (tok_upper)) == 0)
959 /* "If this keyword [lower] is not specified, the lowercase letters
960 `A' through `Z', ..., shall automatically belong to this class,
961 with implementation defined character values." [P1003.2, 2.5.2.1] */
962 set_default (BIT (tok_upper), 'A', 'Z');
963
964 if ((ctype->class_done & BIT (tok_lower)) == 0)
965 /* "If this keyword [lower] is not specified, the lowercase letters
966 `a' through `z', ..., shall automatically belong to this class,
967 with implementation defined character values." [P1003.2, 2.5.2.1] */
968 set_default (BIT (tok_lower), 'a', 'z');
969
970 if ((ctype->class_done & BIT (tok_alpha)) == 0)
971 {
972 /* Table 2-6 in P1003.2 says that characters in class `upper' or
973 class `lower' *must* be in class `alpha'. */
974 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
975 size_t cnt;
976
977 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
978 if ((ctype->class_collection[cnt] & mask) != 0)
979 ctype->class_collection[cnt] |= BIT (tok_alpha);
980 }
981
982 if ((ctype->class_done & BIT (tok_digit)) == 0)
983 /* "If this keyword [digit] is not specified, the digits `0' through
984 `9', ..., shall automatically belong to this class, with
985 implementation-defined character values." [P1003.2, 2.5.2.1] */
986 set_default (BIT (tok_digit), '0', '9');
987
988 /* "Only characters specified for the `alpha' and `digit' keyword
989 shall be specified. Characters specified for the keyword `alpha'
990 and `digit' are automatically included in this class. */
991 {
992 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
993 size_t cnt;
994
995 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
996 if ((ctype->class_collection[cnt] & mask) != 0)
997 ctype->class_collection[cnt] |= BIT (tok_alnum);
998 }
999
1000 if ((ctype->class_done & BIT (tok_space)) == 0)
1001 /* "If this keyword [space] is not specified, the characters <space>,
1002 <form-feed>, <newline>, <carriage-return>, <tab>, and
1003 <vertical-tab>, ..., shall automatically belong to this class,
1004 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1005 {
1006 unsigned int value;
1007
1008 value = charset_find_value (charset, "space", 5);
1009 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1010 error (0, 0, _("\
1011 character `%s' not defined while needed as default value"),
1012 "<space>");
1013 else
1014 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1015
1016 value = charset_find_value (charset, "form-feed", 9);
1017 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1018 error (0, 0, _("\
1019 character `%s' not defined while needed as default value"),
1020 "<form-feed>");
1021 else
1022 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1023
1024 value = charset_find_value (charset, "newline", 7);
1025 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1026 error (0, 0, _("\
1027 character `%s' not defined while needed as default value"),
1028 "<newline>");
1029 else
1030 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1031
1032 value = charset_find_value (charset, "carriage-return", 15);
1033 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1034 error (0, 0, _("\
1035 character `%s' not defined while needed as default value"),
1036 "<carriage-return>");
1037 else
1038 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1039
1040 value = charset_find_value (charset, "tab", 3);
1041 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1042 error (0, 0, _("\
1043 character `%s' not defined while needed as default value"),
1044 "<tab>");
1045 else
1046 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1047
1048 value = charset_find_value (charset, "vertical-tab", 12);
1049 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1050 error (0, 0, _("\
1051 character `%s' not defined while needed as default value"),
1052 "<vertical-tab>");
1053 else
1054 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1055 }
1056
1057 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1058 /* "If this keyword is not specified, the digits `0' to `9', the
1059 uppercase letters `A' through `F', and the lowercase letters `a'
1060 through `f', ..., shell automatically belong to this class, with
1061 implementation defined character values." [P1003.2, 2.5.2.1] */
1062 {
1063 set_default (BIT (tok_xdigit), '0', '9');
1064 set_default (BIT (tok_xdigit), 'A', 'F');
1065 set_default (BIT (tok_xdigit), 'a', 'f');
1066 }
1067
1068 if ((ctype->class_done & BIT (tok_blank)) == 0)
1069 /* "If this keyword [blank] is unspecified, the characters <space> and
1070 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1071 {
1072 unsigned int value;
1073
1074 value = charset_find_value (charset, "space", 5);
1075 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1076 error (0, 0, _("\
1077 character `%s' not defined while needed as default value"),
1078 "<space>");
1079 else
1080 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1081
1082 value = charset_find_value (charset, "tab", 3);
1083 if ((wchar_t) value == ILLEGAL_CHAR_VALUE && !be_quiet)
1084 error (0, 0, _("\
1085 character `%s' not defined while needed as default value"),
1086 "<tab>");
1087 else
1088 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1089 }
1090
1091 if ((ctype->class_done & BIT (tok_graph)) == 0)
1092 /* "If this keyword [graph] is not specified, characters specified for
1093 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1094 shall belong to this character class." [P1003.2, 2.5.2.1] */
1095 {
1096 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1097 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1098 size_t cnt;
1099
1100 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1101 if ((ctype->class_collection[cnt] & mask) != 0)
1102 ctype->class_collection[cnt] |= BIT (tok_graph);
1103 }
1104
1105 if ((ctype->class_done & BIT (tok_print)) == 0)
1106 /* "If this keyword [print] is not provided, characters specified for
1107 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1108 and the <space> character shall belong to this character class."
1109 [P1003.2, 2.5.2.1] */
1110 {
1111 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1112 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1113 size_t cnt;
1114 wchar_t space;
1115
1116 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1117 if ((ctype->class_collection[cnt] & mask) != 0)
1118 ctype->class_collection[cnt] |= BIT (tok_print);
1119
1120 space = charset_find_value (charset, "space", 5);
1121 if (space == ILLEGAL_CHAR_VALUE && !be_quiet)
1122 error (0, 0, _("\
1123 character `%s' not defined while needed as default value"),
1124 "<space>");
1125 else
1126 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1127 }
1128
1129 if (ctype->toupper_done == 0)
1130 /* "If this keyword [toupper] is not specified, the lowercase letters
1131 `a' through `z', and their corresponding uppercase letters `A' to
1132 `Z', ..., shall automatically be included, with implementation-
1133 defined character values." [P1003.2, 2.5.2.1] */
1134 {
1135 char tmp[4];
1136 int ch;
1137
1138 strcpy (tmp, "<?>");
1139
1140 for (ch = 'a'; ch <= 'z'; ++ch)
1141 {
1142 unsigned int value_from, value_to;
1143
1144 tmp[1] = (char) ch;
1145
1146 value_from = charset_find_value (charset, &tmp[1], 1);
1147 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE && !be_quiet)
1148 {
1149 error (0, 0, _("\
1150 character `%s' not defined while needed as default value"),
1151 tmp);
1152 continue;
1153 }
1154
1155 /* This conversion is implementation defined. */
1156 tmp[1] = (char) (ch + ('A' - 'a'));
1157 value_to = charset_find_value (charset, &tmp[1], 1);
1158 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE && !be_quiet)
1159 {
1160 error (0, 0, _("\
1161 character `%s' not defined while needed as default value"),
1162 tmp);
1163 continue;
1164 }
1165
1166 /* The index [0] is determined by the order of the
1167 `ctype_map_newP' calls in `ctype_startup'. */
1168 ELEM (ctype, map_collection, [0], value_from) = value_to;
1169 }
1170 }
1171
1172 if (ctype->tolower_done == 0)
1173 /* "If this keyword [tolower] is not specified, the mapping shall be
1174 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1175 {
1176 size_t cnt;
1177
1178 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1179 if (ctype->map_collection[0][cnt] != 0)
1180 ELEM (ctype, map_collection, [1],
1181 ctype->map_collection[0][cnt])
1182 = ctype->charnames[cnt];
1183 }
1184 }
1185
1186
1187 static void
1188 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1189 {
1190 size_t idx;
1191
1192 /* First we have to decide how we organize the arrays. It is easy
1193 for a one-byte character set. But multi-byte character set
1194 cannot be stored flat because the chars might be sparsely used.
1195 So we determine an optimal hashing function for the used
1196 characters.
1197
1198 We use a very trivial hashing function to store the sparse
1199 table. CH % TABSIZE is used as an index. To solve multiple hits
1200 we have N planes. This guarantees a fixed search time for a
1201 character [N / 2]. In the following code we determine the minmum
1202 value for TABSIZE * N, where TABSIZE >= 256. */
1203 size_t min_total = UINT_MAX;
1204 size_t act_size = 256;
1205
1206 if (!be_quiet)
1207 fputs (_("\
1208 Computing table size for character classes might take a while..."),
1209 stderr);
1210
1211 while (act_size < min_total)
1212 {
1213 size_t cnt[act_size];
1214 size_t act_planes = 1;
1215
1216 memset (cnt, '\0', sizeof cnt);
1217
1218 for (idx = 0; idx < 256; ++idx)
1219 cnt[idx] = 1;
1220
1221 for (idx = 0; idx < ctype->charnames_act; ++idx)
1222 if (ctype->charnames[idx] >= 256)
1223 {
1224 size_t nr = ctype->charnames[idx] % act_size;
1225
1226 if (++cnt[nr] > act_planes)
1227 {
1228 act_planes = cnt[nr];
1229 if (act_size * act_planes >= min_total)
1230 break;
1231 }
1232 }
1233
1234 if (act_size * act_planes < min_total)
1235 {
1236 min_total = act_size * act_planes;
1237 ctype->plane_size = act_size;
1238 ctype->plane_cnt = act_planes;
1239 }
1240
1241 ++act_size;
1242 }
1243
1244 if (!be_quiet)
1245 fputs (_(" done\n"), stderr);
1246
1247
1248 #if __BYTE_ORDER == __LITTLE_ENDIAN
1249 # define NAMES_B1 ctype->names_el
1250 # define NAMES_B2 ctype->names_eb
1251 #else
1252 # define NAMES_B1 ctype->names_eb
1253 # define NAMES_B2 ctype->names_el
1254 #endif
1255
1256 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1257 * ctype->plane_cnt,
1258 sizeof (u_int32_t));
1259 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1260 * ctype->plane_cnt,
1261 sizeof (u_int32_t));
1262
1263 for (idx = 1; idx < 256; ++idx)
1264 NAMES_B1[idx] = idx;
1265
1266 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1267 NAMES_B1[0] = 1;
1268
1269 for (idx = 256; idx < ctype->charnames_act; ++idx)
1270 {
1271 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1272 size_t depth = 0;
1273
1274 while (NAMES_B1[nr + depth * ctype->plane_size])
1275 ++depth;
1276 assert (depth < ctype->plane_cnt);
1277
1278 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1279
1280 /* Now for faster access remember the index in the NAMES_B array. */
1281 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1282 }
1283 NAMES_B1[0] = 0;
1284
1285 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1286 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1287
1288
1289 /* You wonder about this amount of memory? This is only because some
1290 users do not manage to address the array with unsigned values or
1291 data types with range >= 256. '\200' would result in the array
1292 index -128. To help these poor people we duplicate the entries for
1293 128 up to 255 below the entry for \0. */
1294 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1295 sizeof (char_class_t));
1296 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1297 * ctype->plane_cnt,
1298 sizeof (char_class32_t));
1299
1300 /* Fill in the character class information. */
1301 #if __BYTE_ORDER == __LITTLE_ENDIAN
1302 # define TRANS(w) CHAR_CLASS_TRANS (w)
1303 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1304 #else
1305 # define TRANS(w) (w)
1306 # define TRANS32(w) (w)
1307 #endif
1308
1309 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1310 if (ctype->charnames[idx] < 256)
1311 ctype->ctype_b[128 + ctype->charnames[idx]]
1312 = TRANS (ctype->class_collection[idx]);
1313
1314 /* Mirror first 127 entries. We must take care that entry -1 is not
1315 mirrored because EOF == -1. */
1316 for (idx = 0; idx < 127; ++idx)
1317 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1318
1319 /* The 32 bit array contains all characters. */
1320 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1321 ctype->ctype32_b[ctype->charnames[idx]]
1322 = TRANS32 (ctype->class_collection[idx]);
1323
1324 /* Room for table of mappings. */
1325 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1326 * sizeof (u_int32_t *));
1327 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1328 * sizeof (u_int32_t *));
1329
1330 /* Fill in all mappings. */
1331 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1332 {
1333 unsigned int idx2;
1334
1335 /* Allocate table. */
1336 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1337 * ctype->plane_cnt + 128)
1338 * sizeof (u_int32_t));
1339 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1340 * ctype->plane_cnt + 128)
1341 * sizeof (u_int32_t));
1342
1343 #if __BYTE_ORDER == __LITTLE_ENDIAN
1344 # define MAP_B1 ctype->map_el
1345 # define MAP_B2 ctype->map_eb
1346 #else
1347 # define MAP_B1 ctype->map_eb
1348 # define MAP_B2 ctype->map_el
1349 #endif
1350
1351 /* Copy default value (identity mapping). */
1352 memcpy (&MAP_B1[idx][128], NAMES_B1,
1353 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1354
1355 /* Copy values from collection. */
1356 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1357 if (ctype->map_collection[idx][idx2] != 0)
1358 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1359 ctype->map_collection[idx][idx2];
1360
1361 /* Mirror first 127 entries. We must take care not to map entry
1362 -1 because EOF == -1. */
1363 for (idx2 = 0; idx2 < 127; ++idx2)
1364 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1365
1366 /* EOF must map to EOF. */
1367 MAP_B1[idx][127] = EOF;
1368
1369 /* And now the other byte order. */
1370 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1371 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1372 }
1373
1374 /* Extra array for class and map names. */
1375 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1376 * sizeof (u_int32_t));
1377 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1378 * sizeof (u_int32_t));
1379
1380 /* Array for width information. Because the expected width are very
1381 small we use only one single byte. This save space and we need
1382 not provide the information twice with both endianesses. */
1383 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1384 * ctype->plane_cnt);
1385 /* Initialize with default width value. */
1386 memset (ctype->width, charset->width_default,
1387 ctype->plane_size * ctype->plane_cnt);
1388 if (charset->width_rules != NULL)
1389 {
1390 size_t cnt;
1391
1392 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1393 if (charset->width_rules[cnt].width != charset->width_default)
1394 for (idx = charset->width_rules[cnt].from;
1395 idx <= charset->width_rules[cnt].to; ++idx)
1396 {
1397 size_t nr = idx % ctype->plane_size;
1398 size_t depth = 0;
1399
1400 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1401 ++depth;
1402 assert (depth < ctype->plane_cnt);
1403
1404 ctype->width[nr + depth * ctype->plane_size]
1405 = charset->width_rules[cnt].width;
1406 }
1407 }
1408
1409 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1410 character set definition gives the number of bytes in the wide
1411 character representation. We compute the number of bytes used
1412 for the UTF-8 encoded form. */
1413 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1414
1415 /* We need the name of the currently used 8-bit character set to
1416 make correct conversion between this 8-bit representation and the
1417 ISO 10646 character set used internally for wide characters. */
1418 ctype->codeset_name = charset->code_set_name;
1419 }