]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <endian.h>
26 #include <limits.h>
27 #include <string.h>
28
29 #include "locales.h"
30 #include "localeinfo.h"
31 #include "langinfo.h"
32 #include "locfile-token.h"
33 #include "stringtrans.h"
34
35 /* Uncomment the following line in the production version. */
36 /* define NDEBUG 1 */
37 #include <assert.h>
38
39
40 void *xmalloc (size_t __n);
41 void *xcalloc (size_t __n, size_t __s);
42 void *xrealloc (void *__ptr, size_t __n);
43
44
45 /* The bit used for representing a special class. */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (1 << BITPOS (class))
48
49 #define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53 #define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56 #define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59
60 /* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
63 #define char_class_t u_int16_t
64 #define CHAR_CLASS_TRANS SWAPU16
65 #define char_class32_t u_int32_t
66 #define CHAR_CLASS32_TRANS SWAPU32
67
68
69 /* The real definition of the struct for the LC_CTYPE locale. */
70 struct locale_ctype_t
71 {
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
75
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77 #define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
78 size_t nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
82 u_int32_t *class_collection;
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
86
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89 #define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
91 u_int32_t *map_collection[MAX_NR_CHARMAP];
92 size_t map_collection_max[MAX_NR_CHARMAP];
93 size_t map_collection_act[MAX_NR_CHARMAP];
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
99
100 /* The arrays for the binary representation. */
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
111 unsigned char *width;
112 u_int32_t mb_cur_max;
113 const char *codeset_name;
114 };
115
116
117 /* Prototypes for local functions. */
118 static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120 static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
123 static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
125 static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
127 static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
129
130
131 void
132 ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
134 {
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
137
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
140
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
144
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
152
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
171
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
179
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
186
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
189 {
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
192 }
193 }
194
195
196 void
197 ctype_finish (struct localedef_t *locale, struct charset_t *charset)
198 {
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200 #define NCLASS 12
201 static const struct
202 {
203 const char *name;
204 const char allow[NCLASS];
205 }
206 valid_table[NCLASS] =
207 {
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
222 };
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
227
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
230
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
233 {
234 unsigned long int tmp;
235
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
239
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
244 {
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
247 {
248 case 'M':
249 if (!eq)
250 {
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
254
255 value = ctype->charnames[cnt];
256
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
264
265 if (!be_quiet)
266 error (0, 0, _("\
267 character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
268 cp, valid_table[cls1].name,
269 valid_table[cls2].name);
270 }
271 break;
272
273 case 'X':
274 if (eq)
275 {
276 char buf[17];
277 char *cp = buf;
278 unsigned int value;
279
280 value = ctype->charnames[cnt];
281
282 if ((value & 0xff000000) != 0)
283 cp += sprintf (cp, "\\%o", value >> 24);
284 if ((value & 0xffff0000) != 0)
285 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
286 if ((value & 0xffffff00) != 0)
287 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
288 sprintf (cp, "\\%o", value & 0xff);
289
290 if (!be_quiet)
291 error (0, 0, _("\
292 character %s'%s' in class `%s' must not be in class `%s'"),
293 value > 256 ? "L" : "", cp,
294 valid_table[cls1].name,
295 valid_table[cls2].name);
296 }
297 break;
298
299 case 'D':
300 ctype->class_collection[cnt] |= 1 << cls2;
301 break;
302
303 default:
304 error (5, 0, _("internal error in %s, line %u"),
305 __FUNCTION__, __LINE__);
306 }
307 }
308 }
309
310 /* ... and now test <SP> as a special case. */
311 space_value = charset_find_value (&charset->char_table, "SP", 2);
312 if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
313 {
314 if (!be_quiet)
315 error (0, 0, _("character <SP> not defined in character map"));
316 }
317 else if (((cnt = BITPOS (tok_space),
318 (ELEM (ctype, class_collection, , space_value)
319 & BIT (tok_space)) == 0)
320 || (cnt = BITPOS (tok_blank),
321 (ELEM (ctype, class_collection, , space_value)
322 & BIT (tok_blank)) == 0)))
323 {
324 if (!be_quiet)
325 error (0, 0, _("<SP> character not in class `%s'"),
326 valid_table[cnt].name);
327 }
328 else if (((cnt = BITPOS (tok_punct),
329 (ELEM (ctype, class_collection, , space_value)
330 & BIT (tok_punct)) != 0)
331 || (cnt = BITPOS (tok_graph),
332 (ELEM (ctype, class_collection, , space_value)
333 & BIT (tok_graph))
334 != 0)))
335 {
336 if (!be_quiet)
337 error (0, 0, _("<SP> character must not be in class `%s'"),
338 valid_table[cnt].name);
339 }
340 else
341 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
342
343 /* Now that the tests are done make sure the name array contains all
344 characters which are handled in the WIDTH section of the
345 character set definition file. */
346 if (charset->width_rules != NULL)
347 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
348 {
349 size_t inner;
350 for (inner = charset->width_rules[cnt].from;
351 inner <= charset->width_rules[cnt].to; ++inner)
352 (void) find_idx (ctype, NULL, NULL, NULL, inner);
353 }
354 }
355
356
357 void
358 ctype_output (struct localedef_t *locale, struct charset_t *charset,
359 const char *output_path)
360 {
361 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
362 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
363 + 2 * (ctype->map_collection_nr - 2));
364 struct iovec iov[2 + nelems + ctype->nr_charclass
365 + ctype->map_collection_nr];
366 struct locale_file data;
367 u_int32_t idx[nelems];
368 size_t elem, cnt, offset, total;
369
370
371 if ((locale->binary & (1 << LC_CTYPE)) != 0)
372 {
373 iov[0].iov_base = ctype;
374 iov[0].iov_len = locale->len[LC_CTYPE];
375
376 write_locale_data (output_path, "LC_CTYPE", 1, iov);
377
378 return;
379 }
380
381
382 /* Now prepare the output: Find the sizes of the table we can use. */
383 allocate_arrays (ctype, charset);
384
385 data.magic = LIMAGIC (LC_CTYPE);
386 data.n = nelems;
387 iov[0].iov_base = (void *) &data;
388 iov[0].iov_len = sizeof (data);
389
390 iov[1].iov_base = (void *) idx;
391 iov[1].iov_len = sizeof (idx);
392
393 idx[0] = iov[0].iov_len + iov[1].iov_len;
394 offset = 0;
395
396 for (elem = 0; elem < nelems; ++elem)
397 {
398 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
399 switch (elem)
400 {
401 #define CTYPE_DATA(name, base, len) \
402 case _NL_ITEM_INDEX (name): \
403 iov[2 + elem + offset].iov_base = (base); \
404 iov[2 + elem + offset].iov_len = (len); \
405 if (elem + 1 < nelems) \
406 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
407 break
408
409 CTYPE_DATA (_NL_CTYPE_CLASS,
410 ctype->ctype_b,
411 (256 + 128) * sizeof (char_class_t));
412
413 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
414 ctype->map_eb[0],
415 (ctype->plane_size * ctype->plane_cnt + 128)
416 * sizeof (u_int32_t));
417 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
418 ctype->map_eb[1],
419 (ctype->plane_size * ctype->plane_cnt + 128)
420 * sizeof (u_int32_t));
421
422 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
423 ctype->map_el[0],
424 (ctype->plane_size * ctype->plane_cnt + 128)
425 * sizeof (u_int32_t));
426 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
427 ctype->map_el[1],
428 (ctype->plane_size * ctype->plane_cnt + 128)
429 * sizeof (u_int32_t));
430
431 CTYPE_DATA (_NL_CTYPE_CLASS32,
432 ctype->ctype32_b,
433 (ctype->plane_size * ctype->plane_cnt
434 * sizeof (char_class32_t)));
435
436 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
437 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
438 * sizeof (u_int32_t)));
439 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
440 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
441 * sizeof (u_int32_t)));
442
443 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
444 &ctype->plane_size, sizeof (u_int32_t));
445 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
446 &ctype->plane_cnt, sizeof (u_int32_t));
447
448 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
449 /* The class name array. */
450 total = 0;
451 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
452 {
453 iov[2 + elem + offset].iov_base
454 = (void *) ctype->classnames[cnt];
455 iov[2 + elem + offset].iov_len
456 = strlen (ctype->classnames[cnt]) + 1;
457 total += iov[2 + elem + offset].iov_len;
458 }
459 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
460 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
461 total += 1 + (4 - ((total + 1) % 4));
462
463 if (elem + 1 < nelems)
464 idx[elem + 1] = idx[elem] + total;
465 break;
466
467 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
468 /* The class name array. */
469 total = 0;
470 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
471 {
472 iov[2 + elem + offset].iov_base
473 = (void *) ctype->mapnames[cnt];
474 iov[2 + elem + offset].iov_len
475 = strlen (ctype->mapnames[cnt]) + 1;
476 total += iov[2 + elem + offset].iov_len;
477 }
478 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
479 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
480 total += 1 + (4 - ((total + 1) % 4));
481
482 if (elem + 1 < nelems)
483 idx[elem + 1] = idx[elem] + total;
484 break;
485
486 CTYPE_DATA (_NL_CTYPE_WIDTH,
487 ctype->width, ctype->plane_size * ctype->plane_cnt);
488
489 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
490 &ctype->mb_cur_max, sizeof (u_int32_t));
491
492 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
493 total = strlen (ctype->codeset_name) + 1;
494 if (total % 4 == 0)
495 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
496 else
497 {
498 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
499 memset (mempcpy (iov[2 + elem + offset].iov_base,
500 ctype->codeset_name, total),
501 '\0', 4 - (total & 3));
502 total = (total + 3) & ~3;
503 }
504 iov[2 + elem + offset].iov_len = total;
505 if (elem + 1 < nelems)
506 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
507 break;
508
509 default:
510 assert (! "unknown CTYPE element");
511 }
512 else
513 {
514 /* Handle extra maps. */
515 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
516
517 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
518 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
519 else
520 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
521
522 iov[2 + elem + offset].iov_len = ((ctype->plane_size
523 * ctype->plane_cnt + 128)
524 * sizeof (u_int32_t));
525
526 if (elem + 1 < nelems)
527 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
528 }
529 }
530
531 assert (2 + elem + offset == (nelems + ctype->nr_charclass
532 + ctype->map_collection_nr + 2));
533
534 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
535 }
536
537
538 /* Character class handling. */
539 void
540 ctype_class_new (struct linereader *lr, struct localedef_t *locale,
541 enum token_t tok, struct token *code,
542 struct charset_t *charset)
543 {
544 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
545 code->val.str.start);
546 }
547
548
549 int
550 ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
551 const char *name)
552 {
553 size_t cnt;
554
555 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
556 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
557 == 0)
558 return 1;
559
560 return 0;
561 }
562
563
564 void
565 ctype_class_start (struct linereader *lr, struct localedef_t *locale,
566 enum token_t tok, const char *str,
567 struct charset_t *charset)
568 {
569 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
570 size_t cnt;
571
572 switch (tok)
573 {
574 case tok_upper:
575 str = "upper";
576 break;
577 case tok_lower:
578 str = "lower";
579 break;
580 case tok_alpha:
581 str = "alpha";
582 break;
583 case tok_digit:
584 str = "digit";
585 break;
586 case tok_xdigit:
587 str = "xdigit";
588 break;
589 case tok_space:
590 str = "space";
591 break;
592 case tok_print:
593 str = "print";
594 break;
595 case tok_graph:
596 str = "graph";
597 break;
598 case tok_blank:
599 str = "blank";
600 break;
601 case tok_cntrl:
602 str = "cntrl";
603 break;
604 case tok_punct:
605 str = "punct";
606 break;
607 case tok_alnum:
608 str = "alnum";
609 break;
610 case tok_ident:
611 break;
612 default:
613 assert (! "illegal token as class name: should not happen");
614 }
615
616 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
617 if (strcmp (str, ctype->classnames[cnt]) == 0)
618 break;
619
620 if (cnt >= ctype->nr_charclass)
621 assert (! "unknown class in class definition: should not happen");
622
623 ctype->class_done |= BIT (tok);
624
625 ctype->current_class_mask = 1 << cnt;
626 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
627 }
628
629
630 void
631 ctype_class_from (struct linereader *lr, struct localedef_t *locale,
632 struct token *code, struct charset_t *charset)
633 {
634 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
635 unsigned int value;
636
637 value = charset_find_value (&charset->char_table, code->val.str.start,
638 code->val.str.len);
639
640 ctype->last_class_char = value;
641
642 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
643 /* In the LC_CTYPE category it is no error when a character is
644 not found. This has to be ignored silently. */
645 return;
646
647 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
648 &ctype->class_collection_act, value)
649 |= ctype->current_class_mask;
650 }
651
652
653 void
654 ctype_class_to (struct linereader *lr, struct localedef_t *locale,
655 struct token *code, struct charset_t *charset)
656 {
657 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
658 unsigned int value, cnt;
659
660 value = charset_find_value (&charset->char_table, code->val.str.start,
661 code->val.str.len);
662
663 /* In the LC_CTYPE category it is no error when a character is
664 not found. This has to be ignored silently. */
665 if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
666 && (wchar_t) value != ILLEGAL_CHAR_VALUE)
667 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
668 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
669 &ctype->class_collection_act, cnt)
670 |= ctype->current_class_mask;
671
672 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
673 }
674
675
676 void
677 ctype_class_end (struct linereader *lr, struct localedef_t *locale)
678 {
679 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
680
681 /* We have no special actions to perform here. */
682 ctype->current_class_mask = 0;
683 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
684 }
685
686
687 /* Character map handling. */
688 void
689 ctype_map_new (struct linereader *lr, struct localedef_t *locale,
690 enum token_t tok, struct token *code,
691 struct charset_t *charset)
692 {
693 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
694 code->val.str.start, charset);
695 }
696
697
698 int
699 ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
700 const char *name)
701 {
702 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
703 size_t cnt;
704
705 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
706 if (strcmp (name, ctype->mapnames[cnt]) == 0)
707 return 1;
708
709 return 0;
710 }
711
712
713 void
714 ctype_map_start (struct linereader *lr, struct localedef_t *locale,
715 enum token_t tok, const char *name, struct charset_t *charset)
716 {
717 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
718 size_t cnt;
719
720 switch (tok)
721 {
722 case tok_toupper:
723 ctype->toupper_done = 1;
724 name = "toupper";
725 break;
726 case tok_tolower:
727 ctype->tolower_done = 1;
728 name = "tolower";
729 break;
730 case tok_ident:
731 break;
732 default:
733 assert (! "unknown token in category `LC_CTYPE' should not happen");
734 }
735
736 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
737 if (strcmp (name, ctype->mapnames[cnt]) == 0)
738 break;
739
740 if (cnt == ctype->map_collection_nr)
741 assert (! "unknown token in category `LC_CTYPE' should not happen");
742
743 ctype->last_map_idx = cnt;
744 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
745 }
746
747
748 void
749 ctype_map_from (struct linereader *lr, struct localedef_t *locale,
750 struct token *code, struct charset_t *charset)
751 {
752 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
753 unsigned int value;
754
755 value = charset_find_value (&charset->char_table, code->val.str.start,
756 code->val.str.len);
757
758 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
759 /* In the LC_CTYPE category it is no error when a character is
760 not found. This has to be ignored silently. */
761 return;
762
763 assert (ctype->last_map_idx < ctype->map_collection_nr);
764
765 ctype->from_map_char = value;
766 }
767
768
769 void
770 ctype_map_to (struct linereader *lr, struct localedef_t *locale,
771 struct token *code, struct charset_t *charset)
772 {
773 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
774 unsigned int value;
775
776 value = charset_find_value (&charset->char_table, code->val.str.start,
777 code->val.str.len);
778
779 if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
780 || (wchar_t) value == ILLEGAL_CHAR_VALUE)
781 {
782 /* In the LC_CTYPE category it is no error when a character is
783 not found. This has to be ignored silently. */
784 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
785 return;
786 }
787
788 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
789 &ctype->map_collection_max[ctype->last_map_idx],
790 &ctype->map_collection_act[ctype->last_map_idx],
791 ctype->from_map_char) = value;
792
793 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
794 }
795
796
797 void
798 ctype_map_end (struct linereader *lr, struct localedef_t *locale)
799 {
800 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
801
802 ctype->last_map_idx = MAX_NR_CHARMAP;
803 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
804 }
805
806
807 /* Local functions. */
808 static void
809 ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
810 const char *name)
811 {
812 size_t cnt;
813
814 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
815 if (strcmp (ctype->classnames[cnt], name) == 0)
816 break;
817
818 if (cnt < ctype->nr_charclass)
819 {
820 lr_error (lr, _("character class `%s' already defined"), name);
821 return;
822 }
823
824 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
825 /* Exit code 2 is prescribed in P1003.2b. */
826 error (2, 0, _("\
827 implementation limit: no more than %d character classes allowed"),
828 MAX_NR_CHARCLASS);
829
830 ctype->classnames[ctype->nr_charclass++] = name;
831 }
832
833
834 static void
835 ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
836 const char *name, struct charset_t *charset)
837 {
838 size_t max_chars = 0;
839 size_t cnt;
840
841 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
842 {
843 if (strcmp (ctype->mapnames[cnt], name) == 0)
844 break;
845
846 if (max_chars < ctype->map_collection_max[cnt])
847 max_chars = ctype->map_collection_max[cnt];
848 }
849
850 if (cnt < ctype->map_collection_nr)
851 {
852 lr_error (lr, _("character map `%s' already defined"), name);
853 return;
854 }
855
856 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
857 /* Exit code 2 is prescribed in P1003.2b. */
858 error (2, 0, _("\
859 implementation limit: no more than %d character maps allowed"),
860 MAX_NR_CHARMAP);
861
862 ctype->mapnames[cnt] = name;
863
864 if (max_chars == 0)
865 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
866 else
867 ctype->map_collection_max[cnt] = max_chars;
868
869 ctype->map_collection[cnt] = (u_int32_t *)
870 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
871 memset (ctype->map_collection[cnt], '\0',
872 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
873 ctype->map_collection_act[cnt] = 256;
874
875 ++ctype->map_collection_nr;
876 }
877
878
879 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
880 is possible if we only want ot extend the name array. */
881 static u_int32_t *
882 find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
883 size_t *act, unsigned int idx)
884 {
885 size_t cnt;
886
887 if (idx < 256)
888 return table == NULL ? NULL : &(*table)[idx];
889
890 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
891 if (ctype->charnames[cnt] == idx)
892 break;
893
894 /* We have to distinguish two cases: the names is found or not. */
895 if (cnt == ctype->charnames_act)
896 {
897 /* Extend the name array. */
898 if (ctype->charnames_act == ctype->charnames_max)
899 {
900 ctype->charnames_max *= 2;
901 ctype->charnames = (unsigned int *)
902 xrealloc (ctype->charnames,
903 sizeof (unsigned int) * ctype->charnames_max);
904 }
905 ctype->charnames[ctype->charnames_act++] = idx;
906 }
907
908 if (table == NULL)
909 /* We have done everything we are asked to do. */
910 return NULL;
911
912 if (cnt >= *act)
913 {
914 if (cnt >= *max)
915 {
916 size_t old_max = *max;
917 do
918 *max *= 2;
919 while (*max <= cnt);
920
921 *table =
922 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
923 memset (&(*table)[old_max], '\0',
924 (*max - old_max) * sizeof (u_int32_t));
925 }
926
927 (*table)[cnt] = 0;
928 *act = cnt;
929 }
930
931 return &(*table)[cnt];
932 }
933
934
935 static void
936 set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
937 {
938 /* These function defines the default values for the classes and conversions
939 according to POSIX.2 2.5.2.1.
940 It may seem that the order of these if-blocks is arbitrary but it is NOT.
941 Don't move them unless you know what you do! */
942
943 void set_default (int bit, int from, int to)
944 {
945 char tmp[2];
946 int ch;
947 /* Define string. */
948 strcpy (tmp, "?");
949
950 for (ch = from; ch <= to; ++ch)
951 {
952 unsigned int value;
953 tmp[0] = ch;
954
955 value = charset_find_value (&charset->char_table, tmp, 1);
956 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
957 {
958 if (!be_quiet)
959 error (0, 0, _("\
960 character `%s' not defined while needed as default value"),
961 tmp);
962 continue;
963 }
964 else
965 ELEM (ctype, class_collection, , value) |= bit;
966 }
967 }
968
969 /* Set default values if keyword was not present. */
970 if ((ctype->class_done & BIT (tok_upper)) == 0)
971 /* "If this keyword [lower] is not specified, the lowercase letters
972 `A' through `Z', ..., shall automatically belong to this class,
973 with implementation defined character values." [P1003.2, 2.5.2.1] */
974 set_default (BIT (tok_upper), 'A', 'Z');
975
976 if ((ctype->class_done & BIT (tok_lower)) == 0)
977 /* "If this keyword [lower] is not specified, the lowercase letters
978 `a' through `z', ..., shall automatically belong to this class,
979 with implementation defined character values." [P1003.2, 2.5.2.1] */
980 set_default (BIT (tok_lower), 'a', 'z');
981
982 if ((ctype->class_done & BIT (tok_alpha)) == 0)
983 {
984 /* Table 2-6 in P1003.2 says that characters in class `upper' or
985 class `lower' *must* be in class `alpha'. */
986 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
987 size_t cnt;
988
989 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
990 if ((ctype->class_collection[cnt] & mask) != 0)
991 ctype->class_collection[cnt] |= BIT (tok_alpha);
992 }
993
994 if ((ctype->class_done & BIT (tok_digit)) == 0)
995 /* "If this keyword [digit] is not specified, the digits `0' through
996 `9', ..., shall automatically belong to this class, with
997 implementation-defined character values." [P1003.2, 2.5.2.1] */
998 set_default (BIT (tok_digit), '0', '9');
999
1000 /* "Only characters specified for the `alpha' and `digit' keyword
1001 shall be specified. Characters specified for the keyword `alpha'
1002 and `digit' are automatically included in this class. */
1003 {
1004 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
1005 size_t cnt;
1006
1007 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1008 if ((ctype->class_collection[cnt] & mask) != 0)
1009 ctype->class_collection[cnt] |= BIT (tok_alnum);
1010 }
1011
1012 if ((ctype->class_done & BIT (tok_space)) == 0)
1013 /* "If this keyword [space] is not specified, the characters <space>,
1014 <form-feed>, <newline>, <carriage-return>, <tab>, and
1015 <vertical-tab>, ..., shall automatically belong to this class,
1016 with implementation-defined character values." [P1003.2, 2.5.2.1] */
1017 {
1018 unsigned int value;
1019
1020 value = charset_find_value (&charset->char_table, "space", 5);
1021 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1022 {
1023 if (!be_quiet)
1024 error (0, 0, _("\
1025 character `%s' not defined while needed as default value"),
1026 "<space>");
1027 }
1028 else
1029 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1030
1031 value = charset_find_value (&charset->char_table, "form-feed", 9);
1032 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1033 {
1034 if (!be_quiet)
1035 error (0, 0, _("\
1036 character `%s' not defined while needed as default value"),
1037 "<form-feed>");
1038 }
1039 else
1040 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1041
1042 value = charset_find_value (&charset->char_table, "newline", 7);
1043 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1044 {
1045 if (!be_quiet)
1046 error (0, 0, _("\
1047 character `%s' not defined while needed as default value"),
1048 "<newline>");
1049 }
1050 else
1051 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1052
1053 value = charset_find_value (&charset->char_table, "carriage-return", 15);
1054 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1055 {
1056 if (!be_quiet)
1057 error (0, 0, _("\
1058 character `%s' not defined while needed as default value"),
1059 "<carriage-return>");
1060 }
1061 else
1062 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1063
1064 value = charset_find_value (&charset->char_table, "tab", 3);
1065 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1066 {
1067 if (!be_quiet)
1068 error (0, 0, _("\
1069 character `%s' not defined while needed as default value"),
1070 "<tab>");
1071 }
1072 else
1073 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1074
1075 value = charset_find_value (&charset->char_table, "vertical-tab", 12);
1076 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1077 {
1078 if (!be_quiet)
1079 error (0, 0, _("\
1080 character `%s' not defined while needed as default value"),
1081 "<vertical-tab>");
1082 }
1083 else
1084 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1085 }
1086
1087 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1088 /* "If this keyword is not specified, the digits `0' to `9', the
1089 uppercase letters `A' through `F', and the lowercase letters `a'
1090 through `f', ..., shell automatically belong to this class, with
1091 implementation defined character values." [P1003.2, 2.5.2.1] */
1092 {
1093 set_default (BIT (tok_xdigit), '0', '9');
1094 set_default (BIT (tok_xdigit), 'A', 'F');
1095 set_default (BIT (tok_xdigit), 'a', 'f');
1096 }
1097
1098 if ((ctype->class_done & BIT (tok_blank)) == 0)
1099 /* "If this keyword [blank] is unspecified, the characters <space> and
1100 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1101 {
1102 unsigned int value;
1103
1104 value = charset_find_value (&charset->char_table, "space", 5);
1105 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1106 {
1107 if (!be_quiet)
1108 error (0, 0, _("\
1109 character `%s' not defined while needed as default value"),
1110 "<space>");
1111 }
1112 else
1113 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1114
1115 value = charset_find_value (&charset->char_table, "tab", 3);
1116 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
1117 {
1118 if (!be_quiet)
1119 error (0, 0, _("\
1120 character `%s' not defined while needed as default value"),
1121 "<tab>");
1122 }
1123 else
1124 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1125 }
1126
1127 if ((ctype->class_done & BIT (tok_graph)) == 0)
1128 /* "If this keyword [graph] is not specified, characters specified for
1129 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1130 shall belong to this character class." [P1003.2, 2.5.2.1] */
1131 {
1132 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1133 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1134 size_t cnt;
1135
1136 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1137 if ((ctype->class_collection[cnt] & mask) != 0)
1138 ctype->class_collection[cnt] |= BIT (tok_graph);
1139 }
1140
1141 if ((ctype->class_done & BIT (tok_print)) == 0)
1142 /* "If this keyword [print] is not provided, characters specified for
1143 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1144 and the <space> character shall belong to this character class."
1145 [P1003.2, 2.5.2.1] */
1146 {
1147 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1148 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1149 size_t cnt;
1150 wchar_t space;
1151
1152 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1153 if ((ctype->class_collection[cnt] & mask) != 0)
1154 ctype->class_collection[cnt] |= BIT (tok_print);
1155
1156 space = charset_find_value (&charset->char_table, "space", 5);
1157 if (space == ILLEGAL_CHAR_VALUE)
1158 {
1159 if (!be_quiet)
1160 error (0, 0, _("\
1161 character `%s' not defined while needed as default value"),
1162 "<space>");
1163 }
1164 else
1165 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1166 }
1167
1168 if (ctype->toupper_done == 0)
1169 /* "If this keyword [toupper] is not specified, the lowercase letters
1170 `a' through `z', and their corresponding uppercase letters `A' to
1171 `Z', ..., shall automatically be included, with implementation-
1172 defined character values." [P1003.2, 2.5.2.1] */
1173 {
1174 char tmp[4];
1175 int ch;
1176
1177 strcpy (tmp, "<?>");
1178
1179 for (ch = 'a'; ch <= 'z'; ++ch)
1180 {
1181 unsigned int value_from, value_to;
1182
1183 tmp[1] = (char) ch;
1184
1185 value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
1186 if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
1187 {
1188 if (!be_quiet)
1189 error (0, 0, _("\
1190 character `%s' not defined while needed as default value"),
1191 tmp);
1192 continue;
1193 }
1194
1195 /* This conversion is implementation defined. */
1196 tmp[1] = (char) (ch + ('A' - 'a'));
1197 value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
1198 if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
1199 {
1200 if (!be_quiet)
1201 error (0, 0, _("\
1202 character `%s' not defined while needed as default value"),
1203 tmp);
1204 continue;
1205 }
1206
1207 /* The index [0] is determined by the order of the
1208 `ctype_map_newP' calls in `ctype_startup'. */
1209 ELEM (ctype, map_collection, [0], value_from) = value_to;
1210 }
1211 }
1212
1213 if (ctype->tolower_done == 0)
1214 /* "If this keyword [tolower] is not specified, the mapping shall be
1215 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1216 {
1217 size_t cnt;
1218
1219 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1220 if (ctype->map_collection[0][cnt] != 0)
1221 ELEM (ctype, map_collection, [1],
1222 ctype->map_collection[0][cnt])
1223 = ctype->charnames[cnt];
1224 }
1225 }
1226
1227
1228 static void
1229 allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
1230 {
1231 size_t idx;
1232
1233 /* First we have to decide how we organize the arrays. It is easy
1234 for a one-byte character set. But multi-byte character set
1235 cannot be stored flat because the chars might be sparsely used.
1236 So we determine an optimal hashing function for the used
1237 characters.
1238
1239 We use a very trivial hashing function to store the sparse
1240 table. CH % TABSIZE is used as an index. To solve multiple hits
1241 we have N planes. This guarantees a fixed search time for a
1242 character [N / 2]. In the following code we determine the minmum
1243 value for TABSIZE * N, where TABSIZE >= 256. */
1244 size_t min_total = UINT_MAX;
1245 size_t act_size = 256;
1246
1247 if (!be_quiet)
1248 fputs (_("\
1249 Computing table size for character classes might take a while..."),
1250 stderr);
1251
1252 while (act_size < min_total)
1253 {
1254 size_t cnt[act_size];
1255 size_t act_planes = 1;
1256
1257 memset (cnt, '\0', sizeof cnt);
1258
1259 for (idx = 0; idx < 256; ++idx)
1260 cnt[idx] = 1;
1261
1262 for (idx = 0; idx < ctype->charnames_act; ++idx)
1263 if (ctype->charnames[idx] >= 256)
1264 {
1265 size_t nr = ctype->charnames[idx] % act_size;
1266
1267 if (++cnt[nr] > act_planes)
1268 {
1269 act_planes = cnt[nr];
1270 if (act_size * act_planes >= min_total)
1271 break;
1272 }
1273 }
1274
1275 if (act_size * act_planes < min_total)
1276 {
1277 min_total = act_size * act_planes;
1278 ctype->plane_size = act_size;
1279 ctype->plane_cnt = act_planes;
1280 }
1281
1282 ++act_size;
1283 }
1284
1285 if (!be_quiet)
1286 fputs (_(" done\n"), stderr);
1287
1288
1289 #if __BYTE_ORDER == __LITTLE_ENDIAN
1290 # define NAMES_B1 ctype->names_el
1291 # define NAMES_B2 ctype->names_eb
1292 #else
1293 # define NAMES_B1 ctype->names_eb
1294 # define NAMES_B2 ctype->names_el
1295 #endif
1296
1297 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1298 * ctype->plane_cnt,
1299 sizeof (u_int32_t));
1300 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1301 * ctype->plane_cnt,
1302 sizeof (u_int32_t));
1303
1304 for (idx = 1; idx < 256; ++idx)
1305 NAMES_B1[idx] = idx;
1306
1307 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1308 NAMES_B1[0] = 1;
1309
1310 for (idx = 256; idx < ctype->charnames_act; ++idx)
1311 {
1312 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1313 size_t depth = 0;
1314
1315 while (NAMES_B1[nr + depth * ctype->plane_size])
1316 ++depth;
1317 assert (depth < ctype->plane_cnt);
1318
1319 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1320
1321 /* Now for faster access remember the index in the NAMES_B array. */
1322 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1323 }
1324 NAMES_B1[0] = 0;
1325
1326 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1327 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1328
1329
1330 /* You wonder about this amount of memory? This is only because some
1331 users do not manage to address the array with unsigned values or
1332 data types with range >= 256. '\200' would result in the array
1333 index -128. To help these poor people we duplicate the entries for
1334 128 up to 255 below the entry for \0. */
1335 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1336 sizeof (char_class_t));
1337 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1338 * ctype->plane_cnt,
1339 sizeof (char_class32_t));
1340
1341 /* Fill in the character class information. */
1342 #if __BYTE_ORDER == __LITTLE_ENDIAN
1343 # define TRANS(w) CHAR_CLASS_TRANS (w)
1344 # define TRANS32(w) CHAR_CLASS32_TRANS (w)
1345 #else
1346 # define TRANS(w) (w)
1347 # define TRANS32(w) (w)
1348 #endif
1349
1350 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1351 if (ctype->charnames[idx] < 256)
1352 ctype->ctype_b[128 + ctype->charnames[idx]]
1353 = TRANS (ctype->class_collection[idx]);
1354
1355 /* Mirror first 127 entries. We must take care that entry -1 is not
1356 mirrored because EOF == -1. */
1357 for (idx = 0; idx < 127; ++idx)
1358 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1359
1360 /* The 32 bit array contains all characters. */
1361 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1362 ctype->ctype32_b[ctype->charnames[idx]]
1363 = TRANS32 (ctype->class_collection[idx]);
1364
1365 /* Room for table of mappings. */
1366 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1367 * sizeof (u_int32_t *));
1368 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1369 * sizeof (u_int32_t *));
1370
1371 /* Fill in all mappings. */
1372 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1373 {
1374 unsigned int idx2;
1375
1376 /* Allocate table. */
1377 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1378 * ctype->plane_cnt + 128)
1379 * sizeof (u_int32_t));
1380 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1381 * ctype->plane_cnt + 128)
1382 * sizeof (u_int32_t));
1383
1384 #if __BYTE_ORDER == __LITTLE_ENDIAN
1385 # define MAP_B1 ctype->map_el
1386 # define MAP_B2 ctype->map_eb
1387 #else
1388 # define MAP_B1 ctype->map_eb
1389 # define MAP_B2 ctype->map_el
1390 #endif
1391
1392 /* Copy default value (identity mapping). */
1393 memcpy (&MAP_B1[idx][128], NAMES_B1,
1394 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
1395
1396 /* Copy values from collection. */
1397 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1398 if (ctype->map_collection[idx][idx2] != 0)
1399 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1400 ctype->map_collection[idx][idx2];
1401
1402 /* Mirror first 127 entries. We must take care not to map entry
1403 -1 because EOF == -1. */
1404 for (idx2 = 0; idx2 < 127; ++idx2)
1405 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1406
1407 /* EOF must map to EOF. */
1408 MAP_B1[idx][127] = EOF;
1409
1410 /* And now the other byte order. */
1411 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1412 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1413 }
1414
1415 /* Extra array for class and map names. */
1416 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1417 * sizeof (u_int32_t));
1418 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1419 * sizeof (u_int32_t));
1420
1421 /* Array for width information. Because the expected width are very
1422 small we use only one single byte. This save space and we need
1423 not provide the information twice with both endianesses. */
1424 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1425 * ctype->plane_cnt);
1426 /* Initialize with default width value. */
1427 memset (ctype->width, charset->width_default,
1428 ctype->plane_size * ctype->plane_cnt);
1429 if (charset->width_rules != NULL)
1430 {
1431 size_t cnt;
1432
1433 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1434 if (charset->width_rules[cnt].width != charset->width_default)
1435 for (idx = charset->width_rules[cnt].from;
1436 idx <= charset->width_rules[cnt].to; ++idx)
1437 {
1438 size_t nr = idx % ctype->plane_size;
1439 size_t depth = 0;
1440
1441 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1442 ++depth;
1443 assert (depth < ctype->plane_cnt);
1444
1445 ctype->width[nr + depth * ctype->plane_size]
1446 = charset->width_rules[cnt].width;
1447 }
1448 }
1449
1450 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1451 character set definition gives the number of bytes in the wide
1452 character representation. We compute the number of bytes used
1453 for the UTF-8 encoded form. */
1454 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
1455
1456 /* We need the name of the currently used 8-bit character set to
1457 make correct conversion between this 8-bit representation and the
1458 ISO 10646 character set used internally for wide characters. */
1459 ctype->codeset_name = charset->code_set_name;
1460 }