]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <byteswap.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <obstack.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <sys/uio.h>
35
36 #include "charmap.h"
37 #include "localeinfo.h"
38 #include "langinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
41 #include "locfile.h"
42 #include "localedef.h"
43
44 #include <assert.h>
45
46
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
53 #endif
54
55
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
60
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
65
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
71
72
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77 struct translit_to_t
78 {
79 uint32_t *str;
80
81 struct translit_to_t *next;
82 };
83
84 struct translit_t
85 {
86 uint32_t *from;
87
88 const char *fname;
89 size_t lineno;
90
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94 };
95
96 struct translit_ignore_t
97 {
98 uint32_t from;
99 uint32_t to;
100 uint32_t step;
101
102 const char *fname;
103 size_t lineno;
104
105 struct translit_ignore_t *next;
106 };
107
108
109 /* The real definition of the struct for the LC_CTYPE locale. */
110 struct locale_ctype_t
111 {
112 uint32_t *charnames;
113 size_t charnames_max;
114 size_t charnames_act;
115
116 struct repertoire_t *repertoire;
117
118 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
119 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
120 size_t nr_charclass;
121 const char *classnames[MAX_NR_CHARCLASS];
122 uint32_t last_class_char;
123 uint32_t class256_collection[256];
124 uint32_t *class_collection;
125 size_t class_collection_max;
126 size_t class_collection_act;
127 uint32_t class_done;
128 uint32_t class_offset;
129
130 struct charseq **mbdigits;
131 size_t mbdigits_act;
132 size_t mbdigits_max;
133 uint32_t *wcdigits;
134 size_t wcdigits_act;
135 size_t wcdigits_max;
136
137 struct charseq *mboutdigits[10];
138 uint32_t wcoutdigits[10];
139 size_t outdigits_act;
140
141 /* If the following number ever turns out to be too small simply
142 increase it. But I doubt it will. --drepper@gnu */
143 #define MAX_NR_CHARMAP 16
144 const char *mapnames[MAX_NR_CHARMAP];
145 uint32_t *map_collection[MAX_NR_CHARMAP];
146 uint32_t map256_collection[2][256];
147 size_t map_collection_max[MAX_NR_CHARMAP];
148 size_t map_collection_act[MAX_NR_CHARMAP];
149 size_t map_collection_nr;
150 size_t last_map_idx;
151 int tomap_done[MAX_NR_CHARMAP];
152 uint32_t map_offset;
153
154 /* Transliteration information. */
155 const char *translit_copy_locale;
156 const char *translit_copy_repertoire;
157 struct translit_t *translit;
158 struct translit_ignore_t *translit_ignore;
159 uint32_t ntranslit_ignore;
160
161 uint32_t *default_missing;
162 const char *default_missing_file;
163 size_t default_missing_lineno;
164
165 /* The arrays for the binary representation. */
166 uint32_t plane_size;
167 uint32_t plane_cnt;
168 char_class_t *ctype_b;
169 char_class32_t *ctype32_b;
170 uint32_t *names;
171 uint32_t **map;
172 uint32_t **map32;
173 struct iovec *class_3level;
174 struct iovec *map_3level;
175 uint32_t *class_name_ptr;
176 uint32_t *map_name_ptr;
177 unsigned char *width;
178 struct iovec width_3level;
179 uint32_t mb_cur_max;
180 const char *codeset_name;
181 uint32_t *translit_from_idx;
182 uint32_t *translit_from_tbl;
183 uint32_t *translit_to_idx;
184 uint32_t *translit_to_tbl;
185 uint32_t translit_idx_size;
186 size_t translit_from_tbl_size;
187 size_t translit_to_tbl_size;
188
189 struct obstack mempool;
190 };
191
192
193 #define obstack_chunk_alloc xmalloc
194 #define obstack_chunk_free free
195
196
197 /* Prototypes for local functions. */
198 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
199 struct charmap_t *charmap, int ignore_content);
200 static void ctype_class_new (struct linereader *lr,
201 struct locale_ctype_t *ctype, const char *name);
202 static void ctype_map_new (struct linereader *lr,
203 struct locale_ctype_t *ctype,
204 const char *name, struct charmap_t *charmap);
205 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
206 size_t *max, size_t *act, unsigned int idx);
207 static void set_class_defaults (struct locale_ctype_t *ctype,
208 struct charmap_t *charmap,
209 struct repertoire_t *repertoire);
210 static void allocate_arrays (struct locale_ctype_t *ctype,
211 struct charmap_t *charmap,
212 struct repertoire_t *repertoire);
213
214
215 static const char *longnames[] =
216 {
217 "zero", "one", "two", "three", "four",
218 "five", "six", "seven", "eight", "nine"
219 };
220 static const char *uninames[] =
221 {
222 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
223 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
224 };
225 static const unsigned char digits[] = "0123456789";
226
227
228 static void
229 ctype_startup (struct linereader *lr, struct localedef_t *locale,
230 struct charmap_t *charmap, int ignore_content)
231 {
232 unsigned int cnt;
233 struct locale_ctype_t *ctype;
234
235 if (!ignore_content)
236 {
237 /* Allocate the needed room. */
238 locale->categories[LC_CTYPE].ctype = ctype =
239 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
240
241 /* We have seen no names yet. */
242 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
243 ctype->charnames =
244 (unsigned int *) xmalloc (ctype->charnames_max
245 * sizeof (unsigned int));
246 for (cnt = 0; cnt < 256; ++cnt)
247 ctype->charnames[cnt] = cnt;
248 ctype->charnames_act = 256;
249
250 /* Fill character class information. */
251 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
252 /* The order of the following instructions determines the bit
253 positions! */
254 ctype_class_new (lr, ctype, "upper");
255 ctype_class_new (lr, ctype, "lower");
256 ctype_class_new (lr, ctype, "alpha");
257 ctype_class_new (lr, ctype, "digit");
258 ctype_class_new (lr, ctype, "xdigit");
259 ctype_class_new (lr, ctype, "space");
260 ctype_class_new (lr, ctype, "print");
261 ctype_class_new (lr, ctype, "graph");
262 ctype_class_new (lr, ctype, "blank");
263 ctype_class_new (lr, ctype, "cntrl");
264 ctype_class_new (lr, ctype, "punct");
265 ctype_class_new (lr, ctype, "alnum");
266 #ifdef PREDEFINED_CLASSES
267 /* The following are extensions from ISO 14652. */
268 ctype_class_new (lr, ctype, "left_to_right");
269 ctype_class_new (lr, ctype, "right_to_left");
270 ctype_class_new (lr, ctype, "num_terminator");
271 ctype_class_new (lr, ctype, "num_separator");
272 ctype_class_new (lr, ctype, "segment_separator");
273 ctype_class_new (lr, ctype, "block_separator");
274 ctype_class_new (lr, ctype, "direction_control");
275 ctype_class_new (lr, ctype, "sym_swap_layout");
276 ctype_class_new (lr, ctype, "char_shape_selector");
277 ctype_class_new (lr, ctype, "num_shape_selector");
278 ctype_class_new (lr, ctype, "non_spacing");
279 ctype_class_new (lr, ctype, "non_spacing_level3");
280 ctype_class_new (lr, ctype, "normal_connect");
281 ctype_class_new (lr, ctype, "r_connect");
282 ctype_class_new (lr, ctype, "no_connect");
283 ctype_class_new (lr, ctype, "no_connect-space");
284 ctype_class_new (lr, ctype, "vowel_connect");
285 #endif
286
287 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
288 ctype->class_collection
289 = (uint32_t *) xcalloc (sizeof (unsigned long int),
290 ctype->class_collection_max);
291 ctype->class_collection_act = 256;
292
293 /* Fill character map information. */
294 ctype->last_map_idx = MAX_NR_CHARMAP;
295 ctype_map_new (lr, ctype, "toupper", charmap);
296 ctype_map_new (lr, ctype, "tolower", charmap);
297 #ifdef PREDEFINED_CLASSES
298 ctype_map_new (lr, ctype, "tosymmetric", charmap);
299 #endif
300
301 /* Fill first 256 entries in `toXXX' arrays. */
302 for (cnt = 0; cnt < 256; ++cnt)
303 {
304 ctype->map_collection[0][cnt] = cnt;
305 ctype->map_collection[1][cnt] = cnt;
306 #ifdef PREDEFINED_CLASSES
307 ctype->map_collection[2][cnt] = cnt;
308 #endif
309 ctype->map256_collection[0][cnt] = cnt;
310 ctype->map256_collection[1][cnt] = cnt;
311 }
312
313 obstack_init (&ctype->mempool);
314 }
315 }
316
317
318 void
319 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
320 {
321 /* See POSIX.2, table 2-6 for the meaning of the following table. */
322 #define NCLASS 12
323 static const struct
324 {
325 const char *name;
326 const char allow[NCLASS];
327 }
328 valid_table[NCLASS] =
329 {
330 /* The order is important. See token.h for more information.
331 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
332 { "upper", "--MX-XDDXXX-" },
333 { "lower", "--MX-XDDXXX-" },
334 { "alpha", "---X-XDDXXX-" },
335 { "digit", "XXX--XDDXXX-" },
336 { "xdigit", "-----XDDXXX-" },
337 { "space", "XXXXX------X" },
338 { "print", "---------X--" },
339 { "graph", "---------X--" },
340 { "blank", "XXXXXM-----X" },
341 { "cntrl", "XXXXX-XX--XX" },
342 { "punct", "XXXXX-DD-X-X" },
343 { "alnum", "-----XDDXXX-" }
344 };
345 size_t cnt;
346 int cls1, cls2;
347 uint32_t space_value;
348 struct charseq *space_seq;
349 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
350 int warned;
351 const void *key;
352 size_t len;
353 void *vdata;
354 void *curs;
355
356 /* Now resolve copying and also handle completely missing definitions. */
357 if (ctype == NULL)
358 {
359 const char *repertoire_name;
360
361 /* First see whether we were supposed to copy. If yes, find the
362 actual definition. */
363 if (locale->copy_name[LC_CTYPE] != NULL)
364 {
365 /* Find the copying locale. This has to happen transitively since
366 the locale we are copying from might also copying another one. */
367 struct localedef_t *from = locale;
368
369 do
370 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
371 from->repertoire_name, charmap);
372 while (from->categories[LC_CTYPE].ctype == NULL
373 && from->copy_name[LC_CTYPE] != NULL);
374
375 ctype = locale->categories[LC_CTYPE].ctype
376 = from->categories[LC_CTYPE].ctype;
377 }
378
379 /* If there is still no definition issue an warning and create an
380 empty one. */
381 if (ctype == NULL)
382 {
383 if (! be_quiet)
384 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
385 ctype_startup (NULL, locale, charmap, 0);
386 ctype = locale->categories[LC_CTYPE].ctype;
387 }
388
389 /* Get the repertoire we have to use. */
390 repertoire_name = locale->repertoire_name ?: repertoire_global;
391 if (repertoire_name != NULL)
392 ctype->repertoire = repertoire_read (repertoire_name);
393 }
394
395 /* We need the name of the currently used 8-bit character set to
396 make correct conversion between this 8-bit representation and the
397 ISO 10646 character set used internally for wide characters. */
398 ctype->codeset_name = charmap->code_set_name;
399 if (ctype->codeset_name == NULL)
400 {
401 if (! be_quiet)
402 error (0, 0, "no character set name specified in charmap");
403 ctype->codeset_name = "//UNKNOWN//";
404 }
405
406 /* Set default value for classes not specified. */
407 set_class_defaults (ctype, charmap, ctype->repertoire);
408
409 /* Check according to table. */
410 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
411 {
412 uint32_t tmp = ctype->class_collection[cnt];
413
414 if (tmp != 0)
415 {
416 for (cls1 = 0; cls1 < NCLASS; ++cls1)
417 if ((tmp & _ISwbit (cls1)) != 0)
418 for (cls2 = 0; cls2 < NCLASS; ++cls2)
419 if (valid_table[cls1].allow[cls2] != '-')
420 {
421 int eq = (tmp & _ISwbit (cls2)) != 0;
422 switch (valid_table[cls1].allow[cls2])
423 {
424 case 'M':
425 if (!eq)
426 {
427 uint32_t value = ctype->charnames[cnt];
428
429 if (!be_quiet)
430 error (0, 0, _("\
431 character L'\\u%0*x' in class `%s' must be in class `%s'"),
432 value > 0xffff ? 8 : 4, value,
433 valid_table[cls1].name,
434 valid_table[cls2].name);
435 }
436 break;
437
438 case 'X':
439 if (eq)
440 {
441 uint32_t value = ctype->charnames[cnt];
442
443 if (!be_quiet)
444 error (0, 0, _("\
445 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
446 value > 0xffff ? 8 : 4, value,
447 valid_table[cls1].name,
448 valid_table[cls2].name);
449 }
450 break;
451
452 case 'D':
453 ctype->class_collection[cnt] |= _ISwbit (cls2);
454 break;
455
456 default:
457 error (5, 0, _("internal error in %s, line %u"),
458 __FUNCTION__, __LINE__);
459 }
460 }
461 }
462 }
463
464 for (cnt = 0; cnt < 256; ++cnt)
465 {
466 uint32_t tmp = ctype->class256_collection[cnt];
467
468 if (tmp != 0)
469 {
470 for (cls1 = 0; cls1 < NCLASS; ++cls1)
471 if ((tmp & _ISbit (cls1)) != 0)
472 for (cls2 = 0; cls2 < NCLASS; ++cls2)
473 if (valid_table[cls1].allow[cls2] != '-')
474 {
475 int eq = (tmp & _ISbit (cls2)) != 0;
476 switch (valid_table[cls1].allow[cls2])
477 {
478 case 'M':
479 if (!eq)
480 {
481 char buf[17];
482
483 snprintf (buf, sizeof buf, "\\%Zo", cnt);
484
485 if (!be_quiet)
486 error (0, 0, _("\
487 character '%s' in class `%s' must be in class `%s'"),
488 buf, valid_table[cls1].name,
489 valid_table[cls2].name);
490 }
491 break;
492
493 case 'X':
494 if (eq)
495 {
496 char buf[17];
497
498 snprintf (buf, sizeof buf, "\\%Zo", cnt);
499
500 if (!be_quiet)
501 error (0, 0, _("\
502 character '%s' in class `%s' must not be in class `%s'"),
503 buf, valid_table[cls1].name,
504 valid_table[cls2].name);
505 }
506 break;
507
508 case 'D':
509 ctype->class256_collection[cnt] |= _ISbit (cls2);
510 break;
511
512 default:
513 error (5, 0, _("internal error in %s, line %u"),
514 __FUNCTION__, __LINE__);
515 }
516 }
517 }
518 }
519
520 /* ... and now test <SP> as a special case. */
521 space_value = 32;
522 if (((cnt = BITPOS (tok_space),
523 (ELEM (ctype, class_collection, , space_value)
524 & BITw (tok_space)) == 0)
525 || (cnt = BITPOS (tok_blank),
526 (ELEM (ctype, class_collection, , space_value)
527 & BITw (tok_blank)) == 0)))
528 {
529 if (!be_quiet)
530 error (0, 0, _("<SP> character not in class `%s'"),
531 valid_table[cnt].name);
532 }
533 else if (((cnt = BITPOS (tok_punct),
534 (ELEM (ctype, class_collection, , space_value)
535 & BITw (tok_punct)) != 0)
536 || (cnt = BITPOS (tok_graph),
537 (ELEM (ctype, class_collection, , space_value)
538 & BITw (tok_graph))
539 != 0)))
540 {
541 if (!be_quiet)
542 error (0, 0, _("<SP> character must not be in class `%s'"),
543 valid_table[cnt].name);
544 }
545 else
546 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
547
548 space_seq = charmap_find_value (charmap, "SP", 2);
549 if (space_seq == NULL)
550 space_seq = charmap_find_value (charmap, "space", 5);
551 if (space_seq == NULL)
552 space_seq = charmap_find_value (charmap, "U00000020", 9);
553 if (space_seq == NULL || space_seq->nbytes != 1)
554 {
555 if (!be_quiet)
556 error (0, 0, _("character <SP> not defined in character map"));
557 }
558 else if (((cnt = BITPOS (tok_space),
559 (ctype->class256_collection[space_seq->bytes[0]]
560 & BIT (tok_space)) == 0)
561 || (cnt = BITPOS (tok_blank),
562 (ctype->class256_collection[space_seq->bytes[0]]
563 & BIT (tok_blank)) == 0)))
564 {
565 if (!be_quiet)
566 error (0, 0, _("<SP> character not in class `%s'"),
567 valid_table[cnt].name);
568 }
569 else if (((cnt = BITPOS (tok_punct),
570 (ctype->class256_collection[space_seq->bytes[0]]
571 & BIT (tok_punct)) != 0)
572 || (cnt = BITPOS (tok_graph),
573 (ctype->class256_collection[space_seq->bytes[0]]
574 & BIT (tok_graph)) != 0)))
575 {
576 if (!be_quiet)
577 error (0, 0, _("<SP> character must not be in class `%s'"),
578 valid_table[cnt].name);
579 }
580 else
581 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
582
583 /* Now that the tests are done make sure the name array contains all
584 characters which are handled in the WIDTH section of the
585 character set definition file. */
586 if (charmap->width_rules != NULL)
587 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
588 {
589 unsigned char bytes[charmap->mb_cur_max];
590 int nbytes = charmap->width_rules[cnt].from->nbytes;
591
592 /* We have the range of character for which the width is
593 specified described using byte sequences of the multibyte
594 charset. We have to convert this to UCS4 now. And we
595 cannot simply convert the beginning and the end of the
596 sequence, we have to iterate over the byte sequence and
597 convert it for every single character. */
598 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
599
600 while (nbytes < charmap->width_rules[cnt].to->nbytes
601 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
602 nbytes) <= 0)
603 {
604 /* Find the UCS value for `bytes'. */
605 int inner;
606 uint32_t wch;
607 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
608
609 if (seq == NULL)
610 wch = ILLEGAL_CHAR_VALUE;
611 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
612 wch = seq->ucs4;
613 else
614 wch = repertoire_find_value (ctype->repertoire, seq->name,
615 strlen (seq->name));
616
617 if (wch != ILLEGAL_CHAR_VALUE)
618 /* We are only interested in the side-effects of the
619 `find_idx' call. It will add appropriate entries in
620 the name array if this is necessary. */
621 (void) find_idx (ctype, NULL, NULL, NULL, wch);
622
623 /* "Increment" the bytes sequence. */
624 inner = nbytes - 1;
625 while (inner >= 0 && bytes[inner] == 0xff)
626 --inner;
627
628 if (inner < 0)
629 {
630 /* We have to extend the byte sequence. */
631 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
632 break;
633
634 bytes[0] = 1;
635 memset (&bytes[1], 0, nbytes);
636 ++nbytes;
637 }
638 else
639 {
640 ++bytes[inner];
641 while (++inner < nbytes)
642 bytes[inner] = 0;
643 }
644 }
645 }
646
647 /* Now set all the other characters of the character set to the
648 default width. */
649 curs = NULL;
650 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
651 {
652 struct charseq *data = (struct charseq *) vdata;
653
654 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
655 data->ucs4 = repertoire_find_value (ctype->repertoire,
656 data->name, len);
657
658 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
659 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
660 }
661
662 /* There must be a multiple of 10 digits. */
663 if (ctype->mbdigits_act % 10 != 0)
664 {
665 assert (ctype->mbdigits_act == ctype->wcdigits_act);
666 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
667 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
668 error (0, 0, _("`digit' category has not entries in groups of ten"));
669 }
670
671 /* Check the input digits. There must be a multiple of ten available.
672 In each group it could be that one or the other character is missing.
673 In this case the whole group must be removed. */
674 cnt = 0;
675 while (cnt < ctype->mbdigits_act)
676 {
677 size_t inner;
678 for (inner = 0; inner < 10; ++inner)
679 if (ctype->mbdigits[cnt + inner] == NULL)
680 break;
681
682 if (inner == 10)
683 cnt += 10;
684 else
685 {
686 /* Remove the group. */
687 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
688 ((ctype->wcdigits_act - cnt - 10)
689 * sizeof (ctype->mbdigits[0])));
690 ctype->mbdigits_act -= 10;
691 }
692 }
693
694 /* If no input digits are given use the default. */
695 if (ctype->mbdigits_act == 0)
696 {
697 if (ctype->mbdigits_max == 0)
698 {
699 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
700 10 * sizeof (struct charseq *));
701 ctype->mbdigits_max = 10;
702 }
703
704 for (cnt = 0; cnt < 10; ++cnt)
705 {
706 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
707 digits + cnt, 1);
708 if (ctype->mbdigits[cnt] == NULL)
709 {
710 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
711 longnames[cnt],
712 strlen (longnames[cnt]));
713 if (ctype->mbdigits[cnt] == NULL)
714 {
715 /* Hum, this ain't good. */
716 error (0, 0, _("\
717 no input digits defined and none of the standard names in the charmap"));
718
719 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
720 sizeof (struct charseq) + 1);
721
722 /* This is better than nothing. */
723 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
724 ctype->mbdigits[cnt]->nbytes = 1;
725 }
726 }
727 }
728
729 ctype->mbdigits_act = 10;
730 }
731
732 /* Check the wide character input digits. There must be a multiple
733 of ten available. In each group it could be that one or the other
734 character is missing. In this case the whole group must be
735 removed. */
736 cnt = 0;
737 while (cnt < ctype->wcdigits_act)
738 {
739 size_t inner;
740 for (inner = 0; inner < 10; ++inner)
741 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
742 break;
743
744 if (inner == 10)
745 cnt += 10;
746 else
747 {
748 /* Remove the group. */
749 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
750 ((ctype->wcdigits_act - cnt - 10)
751 * sizeof (ctype->wcdigits[0])));
752 ctype->wcdigits_act -= 10;
753 }
754 }
755
756 /* If no input digits are given use the default. */
757 if (ctype->wcdigits_act == 0)
758 {
759 if (ctype->wcdigits_max == 0)
760 {
761 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
762 10 * sizeof (uint32_t));
763 ctype->wcdigits_max = 10;
764 }
765
766 for (cnt = 0; cnt < 10; ++cnt)
767 ctype->wcdigits[cnt] = L'0' + cnt;
768
769 ctype->mbdigits_act = 10;
770 }
771
772 /* Check the outdigits. */
773 warned = 0;
774 for (cnt = 0; cnt < 10; ++cnt)
775 if (ctype->mboutdigits[cnt] == NULL)
776 {
777 static struct charseq replace[2];
778
779 if (!warned)
780 {
781 error (0, 0, _("\
782 not all characters used in `outdigit' are available in the charmap"));
783 warned = 1;
784 }
785
786 replace[0].nbytes = 1;
787 replace[0].bytes[0] = '?';
788 replace[0].bytes[1] = '\0';
789 ctype->mboutdigits[cnt] = &replace[0];
790 }
791
792 warned = 0;
793 for (cnt = 0; cnt < 10; ++cnt)
794 if (ctype->wcoutdigits[cnt] == 0)
795 {
796 if (!warned)
797 {
798 error (0, 0, _("\
799 not all characters used in `outdigit' are available in the repertoire"));
800 warned = 1;
801 }
802
803 ctype->wcoutdigits[cnt] = L'?';
804 }
805
806 /* Sort the entries in the translit_ignore list. */
807 if (ctype->translit_ignore != NULL)
808 {
809 struct translit_ignore_t *firstp = ctype->translit_ignore;
810 struct translit_ignore_t *runp;
811
812 ctype->ntranslit_ignore = 1;
813
814 for (runp = firstp->next; runp != NULL; runp = runp->next)
815 {
816 struct translit_ignore_t *lastp = NULL;
817 struct translit_ignore_t *cmpp;
818
819 ++ctype->ntranslit_ignore;
820
821 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
822 if (runp->from < cmpp->from)
823 break;
824
825 runp->next = lastp;
826 if (lastp == NULL)
827 firstp = runp;
828 }
829
830 ctype->translit_ignore = firstp;
831 }
832 }
833
834
835 void
836 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
837 const char *output_path)
838 {
839 static const char nulbytes[4] = { 0, 0, 0, 0 };
840 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
841 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
842 + (oldstyle_tables
843 ? (ctype->map_collection_nr - 2)
844 : (ctype->nr_charclass + ctype->map_collection_nr)));
845 struct iovec iov[2 + nelems + ctype->nr_charclass
846 + ctype->map_collection_nr + 2];
847 struct locale_file data;
848 uint32_t idx[nelems + 1];
849 uint32_t default_missing_len;
850 size_t elem, cnt, offset, total;
851 char *cp;
852
853 /* Now prepare the output: Find the sizes of the table we can use. */
854 allocate_arrays (ctype, charmap, ctype->repertoire);
855
856 data.magic = LIMAGIC (LC_CTYPE);
857 data.n = nelems;
858 iov[0].iov_base = (void *) &data;
859 iov[0].iov_len = sizeof (data);
860
861 iov[1].iov_base = (void *) idx;
862 iov[1].iov_len = nelems * sizeof (uint32_t);
863
864 idx[0] = iov[0].iov_len + iov[1].iov_len;
865 offset = 0;
866
867 for (elem = 0; elem < nelems; ++elem)
868 {
869 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
870 switch (elem)
871 {
872 #define CTYPE_EMPTY(name) \
873 case name: \
874 iov[2 + elem + offset].iov_base = (void *) ""; \
875 iov[2 + elem + offset].iov_len = 0; \
876 idx[elem + 1] = idx[elem]; \
877 break
878
879 CTYPE_EMPTY(_NL_CTYPE_GAP1);
880 CTYPE_EMPTY(_NL_CTYPE_GAP2);
881 CTYPE_EMPTY(_NL_CTYPE_GAP3);
882
883 #define CTYPE_DATA(name, base, len) \
884 case _NL_ITEM_INDEX (name): \
885 iov[2 + elem + offset].iov_base = (base); \
886 iov[2 + elem + offset].iov_len = (len); \
887 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
888 break
889
890 CTYPE_DATA (_NL_CTYPE_CLASS,
891 ctype->ctype_b,
892 (256 + 128) * sizeof (char_class_t));
893
894 CTYPE_DATA (_NL_CTYPE_TOUPPER,
895 ctype->map[0],
896 (256 + 128) * sizeof (uint32_t));
897 CTYPE_DATA (_NL_CTYPE_TOLOWER,
898 ctype->map[1],
899 (256 + 128) * sizeof (uint32_t));
900
901 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
902 ctype->map32[0],
903 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
904 * sizeof (uint32_t));
905 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
906 ctype->map32[1],
907 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
908 * sizeof (uint32_t));
909
910 CTYPE_DATA (_NL_CTYPE_CLASS32,
911 ctype->ctype32_b,
912 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
913 * sizeof (char_class32_t));
914
915 CTYPE_DATA (_NL_CTYPE_NAMES,
916 ctype->names,
917 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 0)
918 * sizeof (uint32_t));
919
920 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
921 &ctype->class_offset, sizeof (uint32_t));
922
923 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
924 &ctype->map_offset, sizeof (uint32_t));
925
926 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
927 &ctype->translit_idx_size, sizeof (uint32_t));
928
929 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
930 ctype->translit_from_idx,
931 ctype->translit_idx_size * sizeof (uint32_t));
932
933 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
934 ctype->translit_from_tbl,
935 ctype->translit_from_tbl_size);
936
937 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
938 ctype->translit_to_idx,
939 ctype->translit_idx_size * sizeof (uint32_t));
940
941 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
942 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
943
944 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
945 &ctype->plane_size, sizeof (uint32_t));
946 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
947 &ctype->plane_cnt, sizeof (uint32_t));
948
949 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
950 /* The class name array. */
951 total = 0;
952 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
953 {
954 iov[2 + elem + offset].iov_base
955 = (void *) ctype->classnames[cnt];
956 iov[2 + elem + offset].iov_len
957 = strlen (ctype->classnames[cnt]) + 1;
958 total += iov[2 + elem + offset].iov_len;
959 }
960 iov[2 + elem + offset].iov_base = (void *) nulbytes;
961 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
962 total += 1 + (4 - ((total + 1) % 4));
963
964 idx[elem + 1] = idx[elem] + total;
965 break;
966
967 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
968 /* The class name array. */
969 total = 0;
970 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
971 {
972 iov[2 + elem + offset].iov_base
973 = (void *) ctype->mapnames[cnt];
974 iov[2 + elem + offset].iov_len
975 = strlen (ctype->mapnames[cnt]) + 1;
976 total += iov[2 + elem + offset].iov_len;
977 }
978 iov[2 + elem + offset].iov_base = (void *) nulbytes;
979 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
980 total += 1 + (4 - ((total + 1) % 4));
981
982 idx[elem + 1] = idx[elem] + total;
983 break;
984
985 CTYPE_DATA (_NL_CTYPE_WIDTH,
986 (oldstyle_tables
987 ? ctype->width
988 : ctype->width_3level.iov_base),
989 (oldstyle_tables
990 ? (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul
991 : ctype->width_3level.iov_len));
992
993 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
994 &ctype->mb_cur_max, sizeof (uint32_t));
995
996 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
997 total = strlen (ctype->codeset_name) + 1;
998 if (total % 4 == 0)
999 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1000 else
1001 {
1002 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1003 memset (mempcpy (iov[2 + elem + offset].iov_base,
1004 ctype->codeset_name, total),
1005 '\0', 4 - (total & 3));
1006 total = (total + 3) & ~3;
1007 }
1008 iov[2 + elem + offset].iov_len = total;
1009 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1010 break;
1011
1012 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1013 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1014 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1015 *(uint32_t *) iov[2 + elem + offset].iov_base =
1016 ctype->mbdigits_act / 10;
1017 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1018 break;
1019
1020 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1021 /* Align entries. */
1022 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1023 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1024 idx[elem] += iov[2 + elem + offset].iov_len;
1025 ++offset;
1026
1027 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1028 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1029 *(uint32_t *) iov[2 + elem + offset].iov_base =
1030 ctype->wcdigits_act / 10;
1031 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1032 break;
1033
1034 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1035 /* Compute the length of all possible characters. For INDIGITS
1036 there might be more than one. We simply concatenate all of
1037 them with a NUL byte following. The NUL byte wouldn't be
1038 necessary but it makes it easier for the user. */
1039 total = 0;
1040
1041 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1042 cnt < ctype->mbdigits_act; cnt += 10)
1043 total += ctype->mbdigits[cnt]->nbytes + 1;
1044 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1045 iov[2 + elem + offset].iov_len = total;
1046
1047 cp = iov[2 + elem + offset].iov_base;
1048 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1049 cnt < ctype->mbdigits_act; cnt += 10)
1050 {
1051 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1052 ctype->mbdigits[cnt]->nbytes);
1053 *cp++ = '\0';
1054 }
1055 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1056 break;
1057
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1059 /* Compute the length of all possible characters. For INDIGITS
1060 there might be more than one. We simply concatenate all of
1061 them with a NUL byte following. The NUL byte wouldn't be
1062 necessary but it makes it easier for the user. */
1063 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1064 total = ctype->mboutdigits[cnt]->nbytes + 1;
1065 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1066 iov[2 + elem + offset].iov_len = total;
1067
1068 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1069 ctype->mboutdigits[cnt]->bytes,
1070 ctype->mboutdigits[cnt]->nbytes) = '\0';
1071 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1072 break;
1073
1074 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1075 total = ctype->wcdigits_act / 10;
1076
1077 iov[2 + elem + offset].iov_base =
1078 (uint32_t *) alloca (total * sizeof (uint32_t));
1079 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1080
1081 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1082 cnt < ctype->wcdigits_act; cnt += 10)
1083 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1084 = ctype->wcdigits[cnt];
1085 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1086 break;
1087
1088 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1089 /* Align entries. */
1090 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1091 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1092 idx[elem] += iov[2 + elem + offset].iov_len;
1093 ++offset;
1094 /* FALLTRHOUGH */
1095
1096 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1097 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1098 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1099 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1100 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1101 break;
1102
1103 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1104 default_missing_len = (ctype->default_missing
1105 ? wcslen ((wchar_t *)ctype->default_missing)
1106 : 0);
1107 iov[2 + elem + offset].iov_base = &default_missing_len;
1108 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1109 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1110 break;
1111
1112 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1113 iov[2 + elem + offset].iov_base =
1114 ctype->default_missing ?: (uint32_t *) L"";
1115 iov[2 + elem + offset].iov_len =
1116 wcslen (iov[2 + elem + offset].iov_base);
1117 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1118 break;
1119
1120 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1121 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1122 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1123 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1124 break;
1125
1126 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1127 {
1128 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1129 * 3 * sizeof (uint32_t));
1130 struct translit_ignore_t *runp;
1131
1132 iov[2 + elem + offset].iov_base = ranges;
1133 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1134 * 3 * sizeof (uint32_t));
1135
1136 for (runp = ctype->translit_ignore; runp != NULL;
1137 runp = runp->next)
1138 {
1139 *ranges++ = runp->from;
1140 *ranges++ = runp->to;
1141 *ranges++ = runp->step;
1142 }
1143 }
1144 /* Remove the following line in case a new entry is added
1145 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1146 if (elem < nelems)
1147 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1148 break;
1149
1150 default:
1151 assert (! "unknown CTYPE element");
1152 }
1153 else
1154 {
1155 /* Handle extra maps. */
1156 if (oldstyle_tables)
1157 {
1158 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
1159
1160 iov[2 + elem + offset].iov_base = ctype->map32[nr];
1161 iov[2 + elem + offset].iov_len = ((ctype->plane_size
1162 * ctype->plane_cnt)
1163 * sizeof (uint32_t));
1164
1165 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1166 }
1167 else
1168 {
1169 size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
1170 if (nr < ctype->nr_charclass)
1171 {
1172 iov[2 + elem + offset] = ctype->class_3level[nr];
1173 }
1174 else
1175 {
1176 nr -= ctype->nr_charclass;
1177 assert (nr < ctype->map_collection_nr);
1178 iov[2 + elem + offset] = ctype->map_3level[nr];
1179 }
1180 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1181 }
1182 }
1183 }
1184
1185 assert (2 + elem + offset == (nelems + ctype->nr_charclass
1186 + ctype->map_collection_nr + 2 + 2));
1187
1188 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1189 }
1190
1191
1192 /* Local functions. */
1193 static void
1194 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1195 const char *name)
1196 {
1197 size_t cnt;
1198
1199 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1200 if (strcmp (ctype->classnames[cnt], name) == 0)
1201 break;
1202
1203 if (cnt < ctype->nr_charclass)
1204 {
1205 lr_error (lr, _("character class `%s' already defined"), name);
1206 return;
1207 }
1208
1209 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1210 /* Exit code 2 is prescribed in P1003.2b. */
1211 error (2, 0, _("\
1212 implementation limit: no more than %Zd character classes allowed"),
1213 MAX_NR_CHARCLASS);
1214
1215 ctype->classnames[ctype->nr_charclass++] = name;
1216 }
1217
1218
1219 static void
1220 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1221 const char *name, struct charmap_t *charmap)
1222 {
1223 size_t max_chars = 0;
1224 size_t cnt;
1225
1226 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1227 {
1228 if (strcmp (ctype->mapnames[cnt], name) == 0)
1229 break;
1230
1231 if (max_chars < ctype->map_collection_max[cnt])
1232 max_chars = ctype->map_collection_max[cnt];
1233 }
1234
1235 if (cnt < ctype->map_collection_nr)
1236 {
1237 lr_error (lr, _("character map `%s' already defined"), name);
1238 return;
1239 }
1240
1241 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1242 /* Exit code 2 is prescribed in P1003.2b. */
1243 error (2, 0, _("\
1244 implementation limit: no more than %d character maps allowed"),
1245 MAX_NR_CHARMAP);
1246
1247 ctype->mapnames[cnt] = name;
1248
1249 if (max_chars == 0)
1250 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1251 else
1252 ctype->map_collection_max[cnt] = max_chars;
1253
1254 ctype->map_collection[cnt] = (uint32_t *)
1255 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1256 ctype->map_collection_act[cnt] = 256;
1257
1258 ++ctype->map_collection_nr;
1259 }
1260
1261
1262 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1263 is possible if we only want to extend the name array. */
1264 static uint32_t *
1265 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1266 size_t *act, uint32_t idx)
1267 {
1268 size_t cnt;
1269
1270 if (idx < 256)
1271 return table == NULL ? NULL : &(*table)[idx];
1272
1273 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1274 if (ctype->charnames[cnt] == idx)
1275 break;
1276
1277 /* We have to distinguish two cases: the name is found or not. */
1278 if (cnt == ctype->charnames_act)
1279 {
1280 /* Extend the name array. */
1281 if (ctype->charnames_act == ctype->charnames_max)
1282 {
1283 ctype->charnames_max *= 2;
1284 ctype->charnames = (uint32_t *)
1285 xrealloc (ctype->charnames,
1286 sizeof (uint32_t) * ctype->charnames_max);
1287 }
1288 ctype->charnames[ctype->charnames_act++] = idx;
1289 }
1290
1291 if (table == NULL)
1292 /* We have done everything we are asked to do. */
1293 return NULL;
1294
1295 if (cnt >= *act)
1296 {
1297 if (cnt >= *max)
1298 {
1299 size_t old_max = *max;
1300 do
1301 *max *= 2;
1302 while (*max <= cnt);
1303
1304 *table =
1305 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1306 memset (&(*table)[old_max], '\0',
1307 (*max - old_max) * sizeof (uint32_t));
1308 }
1309
1310 *act = cnt + 1;
1311 }
1312
1313 return &(*table)[cnt];
1314 }
1315
1316
1317 static int
1318 get_character (struct token *now, struct charmap_t *charmap,
1319 struct repertoire_t *repertoire,
1320 struct charseq **seqp, uint32_t *wchp)
1321 {
1322 if (now->tok == tok_bsymbol)
1323 {
1324 /* This will hopefully be the normal case. */
1325 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1326 now->val.str.lenmb);
1327 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1328 now->val.str.lenmb);
1329 }
1330 else if (now->tok == tok_ucs4)
1331 {
1332 char utmp[10];
1333
1334 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1335 *seqp = charmap_find_value (charmap, utmp, 9);
1336
1337 if (*seqp == NULL)
1338 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1339
1340 if (*seqp == NULL)
1341 {
1342 /* Compute the value in the charmap from the UCS value. */
1343 const char *symbol = repertoire_find_symbol (repertoire,
1344 now->val.ucs4);
1345
1346 if (symbol == NULL)
1347 *seqp = NULL;
1348 else
1349 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1350
1351 if (*seqp == NULL)
1352 {
1353 if (repertoire != NULL)
1354 {
1355 /* Insert a negative entry. */
1356 static const struct charseq negative
1357 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1358 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1359 sizeof (uint32_t));
1360 *newp = now->val.ucs4;
1361
1362 insert_entry (&repertoire->seq_table, newp,
1363 sizeof (uint32_t), (void *) &negative);
1364 }
1365 }
1366 else
1367 (*seqp)->ucs4 = now->val.ucs4;
1368 }
1369 else if ((*seqp)->ucs4 != now->val.ucs4)
1370 *seqp = NULL;
1371
1372 *wchp = now->val.ucs4;
1373 }
1374 else if (now->tok == tok_charcode)
1375 {
1376 /* We must map from the byte code to UCS4. */
1377 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1378 now->val.str.lenmb);
1379
1380 if (*seqp == NULL)
1381 *wchp = ILLEGAL_CHAR_VALUE;
1382 else
1383 {
1384 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1385 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1386 strlen ((*seqp)->name));
1387 *wchp = (*seqp)->ucs4;
1388 }
1389 }
1390 else
1391 return 1;
1392
1393 return 0;
1394 }
1395
1396
1397 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1398 the .(2). counterparts. */
1399 static void
1400 charclass_symbolic_ellipsis (struct linereader *ldfile,
1401 struct locale_ctype_t *ctype,
1402 struct charmap_t *charmap,
1403 struct repertoire_t *repertoire,
1404 struct token *now,
1405 const char *last_str,
1406 unsigned long int class256_bit,
1407 unsigned long int class_bit, int base,
1408 int ignore_content, int handle_digits, int step)
1409 {
1410 const char *nowstr = now->val.str.startmb;
1411 char tmp[now->val.str.lenmb + 1];
1412 const char *cp;
1413 char *endp;
1414 unsigned long int from;
1415 unsigned long int to;
1416
1417 /* We have to compute the ellipsis values using the symbolic names. */
1418 assert (last_str != NULL);
1419
1420 if (strlen (last_str) != now->val.str.lenmb)
1421 {
1422 invalid_range:
1423 lr_error (ldfile,
1424 _("`%s' and `%.*s' are no valid names for symbolic range"),
1425 last_str, (int) now->val.str.lenmb, nowstr);
1426 return;
1427 }
1428
1429 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1430 /* Nothing to do, the names are the same. */
1431 return;
1432
1433 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1434 ;
1435
1436 errno = 0;
1437 from = strtoul (cp, &endp, base);
1438 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1439 goto invalid_range;
1440
1441 to = strtoul (nowstr + (cp - last_str), &endp, base);
1442 if ((to == UINT_MAX && errno == ERANGE)
1443 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1444 goto invalid_range;
1445
1446 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1447 if (!ignore_content)
1448 {
1449 now->val.str.startmb = tmp;
1450 while ((from += step) <= to)
1451 {
1452 struct charseq *seq;
1453 uint32_t wch;
1454
1455 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1456 last_str, now->val.str.lenmb - (cp - last_str), from);
1457
1458 get_character (now, charmap, repertoire, &seq, &wch);
1459
1460 if (seq != NULL && seq->nbytes == 1)
1461 /* Yep, we can store information about this byte sequence. */
1462 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1463
1464 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1465 /* We have the UCS4 position. */
1466 *find_idx (ctype, &ctype->class_collection,
1467 &ctype->class_collection_max,
1468 &ctype->class_collection_act, wch) |= class_bit;
1469
1470 if (handle_digits == 1)
1471 {
1472 /* We must store the digit values. */
1473 if (ctype->mbdigits_act == ctype->mbdigits_max)
1474 {
1475 ctype->mbdigits_max *= 2;
1476 ctype->mbdigits = xrealloc (ctype->mbdigits,
1477 (ctype->mbdigits_max
1478 * sizeof (char *)));
1479 ctype->wcdigits_max *= 2;
1480 ctype->wcdigits = xrealloc (ctype->wcdigits,
1481 (ctype->wcdigits_max
1482 * sizeof (uint32_t)));
1483 }
1484
1485 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1486 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1487 }
1488 else if (handle_digits == 2)
1489 {
1490 /* We must store the digit values. */
1491 if (ctype->outdigits_act >= 10)
1492 {
1493 lr_error (ldfile, _("\
1494 %s: field `%s' does not contain exactly ten entries"),
1495 "LC_CTYPE", "outdigit");
1496 return;
1497 }
1498
1499 ctype->mboutdigits[ctype->outdigits_act] = seq;
1500 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1501 ++ctype->outdigits_act;
1502 }
1503 }
1504 }
1505 }
1506
1507
1508 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1509 static void
1510 charclass_ucs4_ellipsis (struct linereader *ldfile,
1511 struct locale_ctype_t *ctype,
1512 struct charmap_t *charmap,
1513 struct repertoire_t *repertoire,
1514 struct token *now, uint32_t last_wch,
1515 unsigned long int class256_bit,
1516 unsigned long int class_bit, int ignore_content,
1517 int handle_digits, int step)
1518 {
1519 if (last_wch > now->val.ucs4)
1520 {
1521 lr_error (ldfile, _("\
1522 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1523 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1524 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1525 return;
1526 }
1527
1528 if (!ignore_content)
1529 while ((last_wch += step) <= now->val.ucs4)
1530 {
1531 /* We have to find out whether there is a byte sequence corresponding
1532 to this UCS4 value. */
1533 struct charseq *seq;
1534 char utmp[10];
1535
1536 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1537 seq = charmap_find_value (charmap, utmp, 9);
1538 if (seq == NULL)
1539 {
1540 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1541 seq = charmap_find_value (charmap, utmp, 5);
1542 }
1543
1544 if (seq == NULL)
1545 /* Try looking in the repertoire map. */
1546 seq = repertoire_find_seq (repertoire, last_wch);
1547
1548 /* If this is the first time we look for this sequence create a new
1549 entry. */
1550 if (seq == NULL)
1551 {
1552 static const struct charseq negative
1553 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1554
1555 /* Find the symbolic name for this UCS4 value. */
1556 if (repertoire != NULL)
1557 {
1558 const char *symbol = repertoire_find_symbol (repertoire,
1559 last_wch);
1560 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1561 sizeof (uint32_t));
1562 *newp = last_wch;
1563
1564 if (symbol != NULL)
1565 /* We have a name, now search the multibyte value. */
1566 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1567
1568 if (seq == NULL)
1569 /* We have to create a fake entry. */
1570 seq = (struct charseq *) &negative;
1571 else
1572 seq->ucs4 = last_wch;
1573
1574 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1575 seq);
1576 }
1577 else
1578 /* We have to create a fake entry. */
1579 seq = (struct charseq *) &negative;
1580 }
1581
1582 /* We have a name, now search the multibyte value. */
1583 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1584 /* Yep, we can store information about this byte sequence. */
1585 ctype->class256_collection[(size_t) seq->bytes[0]]
1586 |= class256_bit;
1587
1588 /* And of course we have the UCS4 position. */
1589 if (class_bit != 0)
1590 *find_idx (ctype, &ctype->class_collection,
1591 &ctype->class_collection_max,
1592 &ctype->class_collection_act, last_wch) |= class_bit;
1593
1594 if (handle_digits == 1)
1595 {
1596 /* We must store the digit values. */
1597 if (ctype->mbdigits_act == ctype->mbdigits_max)
1598 {
1599 ctype->mbdigits_max *= 2;
1600 ctype->mbdigits = xrealloc (ctype->mbdigits,
1601 (ctype->mbdigits_max
1602 * sizeof (char *)));
1603 ctype->wcdigits_max *= 2;
1604 ctype->wcdigits = xrealloc (ctype->wcdigits,
1605 (ctype->wcdigits_max
1606 * sizeof (uint32_t)));
1607 }
1608
1609 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1610 ? seq : NULL);
1611 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1612 }
1613 else if (handle_digits == 2)
1614 {
1615 /* We must store the digit values. */
1616 if (ctype->outdigits_act >= 10)
1617 {
1618 lr_error (ldfile, _("\
1619 %s: field `%s' does not contain exactly ten entries"),
1620 "LC_CTYPE", "outdigit");
1621 return;
1622 }
1623
1624 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1625 ? seq : NULL);
1626 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1627 ++ctype->outdigits_act;
1628 }
1629 }
1630 }
1631
1632
1633 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1634 static void
1635 charclass_charcode_ellipsis (struct linereader *ldfile,
1636 struct locale_ctype_t *ctype,
1637 struct charmap_t *charmap,
1638 struct repertoire_t *repertoire,
1639 struct token *now, char *last_charcode,
1640 uint32_t last_charcode_len,
1641 unsigned long int class256_bit,
1642 unsigned long int class_bit, int ignore_content,
1643 int handle_digits)
1644 {
1645 /* First check whether the to-value is larger. */
1646 if (now->val.charcode.nbytes != last_charcode_len)
1647 {
1648 lr_error (ldfile, _("\
1649 start end end character sequence of range must have the same length"));
1650 return;
1651 }
1652
1653 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1654 {
1655 lr_error (ldfile, _("\
1656 to-value character sequence is smaller than from-value sequence"));
1657 return;
1658 }
1659
1660 if (!ignore_content)
1661 {
1662 do
1663 {
1664 /* Increment the byte sequence value. */
1665 struct charseq *seq;
1666 uint32_t wch;
1667 int i;
1668
1669 for (i = last_charcode_len - 1; i >= 0; --i)
1670 if (++last_charcode[i] != 0)
1671 break;
1672
1673 if (last_charcode_len == 1)
1674 /* Of course we have the charcode value. */
1675 ctype->class256_collection[(size_t) last_charcode[0]]
1676 |= class256_bit;
1677
1678 /* Find the symbolic name. */
1679 seq = charmap_find_symbol (charmap, last_charcode,
1680 last_charcode_len);
1681 if (seq != NULL)
1682 {
1683 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1684 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1685 strlen (seq->name));
1686 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1687
1688 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1689 *find_idx (ctype, &ctype->class_collection,
1690 &ctype->class_collection_max,
1691 &ctype->class_collection_act, wch) |= class_bit;
1692 }
1693 else
1694 wch = ILLEGAL_CHAR_VALUE;
1695
1696 if (handle_digits == 1)
1697 {
1698 /* We must store the digit values. */
1699 if (ctype->mbdigits_act == ctype->mbdigits_max)
1700 {
1701 ctype->mbdigits_max *= 2;
1702 ctype->mbdigits = xrealloc (ctype->mbdigits,
1703 (ctype->mbdigits_max
1704 * sizeof (char *)));
1705 ctype->wcdigits_max *= 2;
1706 ctype->wcdigits = xrealloc (ctype->wcdigits,
1707 (ctype->wcdigits_max
1708 * sizeof (uint32_t)));
1709 }
1710
1711 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1712 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1713 seq->nbytes = last_charcode_len;
1714
1715 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1716 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1717 }
1718 else if (handle_digits == 2)
1719 {
1720 struct charseq *seq;
1721 /* We must store the digit values. */
1722 if (ctype->outdigits_act >= 10)
1723 {
1724 lr_error (ldfile, _("\
1725 %s: field `%s' does not contain exactly ten entries"),
1726 "LC_CTYPE", "outdigit");
1727 return;
1728 }
1729
1730 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1731 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1732 seq->nbytes = last_charcode_len;
1733
1734 ctype->mboutdigits[ctype->outdigits_act] = seq;
1735 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1736 ++ctype->outdigits_act;
1737 }
1738 }
1739 while (memcmp (last_charcode, now->val.charcode.bytes,
1740 last_charcode_len) != 0);
1741 }
1742 }
1743
1744
1745 /* Read one transliteration entry. */
1746 static uint32_t *
1747 read_widestring (struct linereader *ldfile, struct token *now,
1748 struct charmap_t *charmap, struct repertoire_t *repertoire)
1749 {
1750 uint32_t *wstr;
1751
1752 if (now->tok == tok_default_missing)
1753 /* The special name "" will denote this case. */
1754 wstr = ((uint32_t *) { 0 });
1755 else if (now->tok == tok_bsymbol)
1756 {
1757 /* Get the value from the repertoire. */
1758 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1759 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1760 now->val.str.lenmb);
1761 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1762 {
1763 /* We cannot proceed, we don't know the UCS4 value. */
1764 free (wstr);
1765 return NULL;
1766 }
1767
1768 wstr[1] = 0;
1769 }
1770 else if (now->tok == tok_ucs4)
1771 {
1772 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1773 wstr[0] = now->val.ucs4;
1774 wstr[1] = 0;
1775 }
1776 else if (now->tok == tok_charcode)
1777 {
1778 /* Argh, we have to convert to the symbol name first and then to the
1779 UCS4 value. */
1780 struct charseq *seq = charmap_find_symbol (charmap,
1781 now->val.str.startmb,
1782 now->val.str.lenmb);
1783 if (seq == NULL)
1784 /* Cannot find the UCS4 value. */
1785 return NULL;
1786
1787 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1788 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1789 strlen (seq->name));
1790 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1791 /* We cannot proceed, we don't know the UCS4 value. */
1792 return NULL;
1793
1794 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1795 wstr[0] = seq->ucs4;
1796 wstr[1] = 0;
1797 }
1798 else if (now->tok == tok_string)
1799 {
1800 wstr = now->val.str.startwc;
1801 if (wstr == NULL || wstr[0] == 0)
1802 return NULL;
1803 }
1804 else
1805 {
1806 if (now->tok != tok_eol && now->tok != tok_eof)
1807 lr_ignore_rest (ldfile, 0);
1808 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1809 return (uint32_t *) -1l;
1810 }
1811
1812 return wstr;
1813 }
1814
1815
1816 static void
1817 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1818 struct token *now, struct charmap_t *charmap,
1819 struct repertoire_t *repertoire)
1820 {
1821 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1822 struct translit_t *result;
1823 struct translit_to_t **top;
1824 struct obstack *ob = &ctype->mempool;
1825 int first;
1826 int ignore;
1827
1828 if (from_wstr == NULL)
1829 /* There is no valid from string. */
1830 return;
1831
1832 result = (struct translit_t *) obstack_alloc (ob,
1833 sizeof (struct translit_t));
1834 result->from = from_wstr;
1835 result->fname = ldfile->fname;
1836 result->lineno = ldfile->lineno;
1837 result->next = NULL;
1838 result->to = NULL;
1839 top = &result->to;
1840 first = 1;
1841 ignore = 0;
1842
1843 while (1)
1844 {
1845 uint32_t *to_wstr;
1846
1847 /* Next we have one or more transliterations. They are
1848 separated by semicolons. */
1849 now = lr_token (ldfile, charmap, repertoire);
1850
1851 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1852 {
1853 /* One string read. */
1854 const uint32_t zero = 0;
1855
1856 if (!ignore)
1857 {
1858 obstack_grow (ob, &zero, 4);
1859 to_wstr = obstack_finish (ob);
1860
1861 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1862 (*top)->str = to_wstr;
1863 (*top)->next = NULL;
1864 }
1865
1866 if (now->tok == tok_eol)
1867 {
1868 result->next = ctype->translit;
1869 ctype->translit = result;
1870 return;
1871 }
1872
1873 if (!ignore)
1874 top = &(*top)->next;
1875 ignore = 0;
1876 }
1877 else
1878 {
1879 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1880 if (to_wstr == (uint32_t *) -1l)
1881 {
1882 /* An error occurred. */
1883 obstack_free (ob, result);
1884 return;
1885 }
1886
1887 if (to_wstr == NULL)
1888 ignore = 1;
1889 else
1890 /* This value is usable. */
1891 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1892
1893 first = 0;
1894 }
1895 }
1896 }
1897
1898
1899 static void
1900 read_translit_ignore_entry (struct linereader *ldfile,
1901 struct locale_ctype_t *ctype,
1902 struct charmap_t *charmap,
1903 struct repertoire_t *repertoire)
1904 {
1905 /* We expect a semicolon-separated list of characters we ignore. We are
1906 only interested in the wide character definitions. These must be
1907 single characters, possibly defining a range when an ellipsis is used. */
1908 while (1)
1909 {
1910 struct token *now = lr_token (ldfile, charmap, repertoire);
1911 struct translit_ignore_t *newp;
1912 uint32_t from;
1913
1914 if (now->tok == tok_eol || now->tok == tok_eof)
1915 {
1916 lr_error (ldfile,
1917 _("premature end of `translit_ignore' definition"));
1918 return;
1919 }
1920
1921 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1922 {
1923 lr_error (ldfile, _("syntax error"));
1924 lr_ignore_rest (ldfile, 0);
1925 return;
1926 }
1927
1928 if (now->tok == tok_ucs4)
1929 from = now->val.ucs4;
1930 else
1931 /* Try to get the value. */
1932 from = repertoire_find_value (repertoire, now->val.str.startmb,
1933 now->val.str.lenmb);
1934
1935 if (from == ILLEGAL_CHAR_VALUE)
1936 {
1937 lr_error (ldfile, "invalid character name");
1938 newp = NULL;
1939 }
1940 else
1941 {
1942 newp = (struct translit_ignore_t *)
1943 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1944 newp->from = from;
1945 newp->to = from;
1946 newp->step = 1;
1947
1948 newp->next = ctype->translit_ignore;
1949 ctype->translit_ignore = newp;
1950 }
1951
1952 /* Now we expect either a semicolon, an ellipsis, or the end of the
1953 line. */
1954 now = lr_token (ldfile, charmap, repertoire);
1955
1956 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1957 {
1958 /* XXX Should we bother implementing `....'? `...' certainly
1959 will not be implemented. */
1960 uint32_t to;
1961 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
1962
1963 now = lr_token (ldfile, charmap, repertoire);
1964
1965 if (now->tok == tok_eol || now->tok == tok_eof)
1966 {
1967 lr_error (ldfile,
1968 _("premature end of `translit_ignore' definition"));
1969 return;
1970 }
1971
1972 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1973 {
1974 lr_error (ldfile, _("syntax error"));
1975 lr_ignore_rest (ldfile, 0);
1976 return;
1977 }
1978
1979 if (now->tok == tok_ucs4)
1980 to = now->val.ucs4;
1981 else
1982 /* Try to get the value. */
1983 to = repertoire_find_value (repertoire, now->val.str.startmb,
1984 now->val.str.lenmb);
1985
1986 if (to == ILLEGAL_CHAR_VALUE)
1987 lr_error (ldfile, "invalid character name");
1988 else
1989 {
1990 /* Make sure the `to'-value is larger. */
1991 if (to >= from)
1992 {
1993 newp->to = to;
1994 newp->step = step;
1995 }
1996 else
1997 lr_error (ldfile, _("\
1998 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1999 (to | from) < 65536 ? 4 : 8, to,
2000 (to | from) < 65536 ? 4 : 8, from);
2001 }
2002
2003 /* And the next token. */
2004 now = lr_token (ldfile, charmap, repertoire);
2005 }
2006
2007 if (now->tok == tok_eol || now->tok == tok_eof)
2008 /* We are done. */
2009 return;
2010
2011 if (now->tok == tok_semicolon)
2012 /* Next round. */
2013 continue;
2014
2015 /* If we come here something is wrong. */
2016 lr_error (ldfile, _("syntax error"));
2017 lr_ignore_rest (ldfile, 0);
2018 return;
2019 }
2020 }
2021
2022
2023 /* The parser for the LC_CTYPE section of the locale definition. */
2024 void
2025 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2026 struct charmap_t *charmap, const char *repertoire_name,
2027 int ignore_content)
2028 {
2029 struct repertoire_t *repertoire = NULL;
2030 struct locale_ctype_t *ctype;
2031 struct token *now;
2032 enum token_t nowtok;
2033 size_t cnt;
2034 struct charseq *last_seq;
2035 uint32_t last_wch = 0;
2036 enum token_t last_token;
2037 enum token_t ellipsis_token;
2038 int step;
2039 char last_charcode[16];
2040 size_t last_charcode_len = 0;
2041 const char *last_str = NULL;
2042 int mapidx;
2043
2044 /* Get the repertoire we have to use. */
2045 if (repertoire_name != NULL)
2046 repertoire = repertoire_read (repertoire_name);
2047
2048 /* The rest of the line containing `LC_CTYPE' must be free. */
2049 lr_ignore_rest (ldfile, 1);
2050
2051
2052 do
2053 {
2054 now = lr_token (ldfile, charmap, NULL);
2055 nowtok = now->tok;
2056 }
2057 while (nowtok == tok_eol);
2058
2059 /* If we see `copy' now we are almost done. */
2060 if (nowtok == tok_copy)
2061 {
2062 handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
2063 LC_CTYPE, "LC_CTYPE", ignore_content);
2064 return;
2065 }
2066
2067 /* Prepare the data structures. */
2068 ctype_startup (ldfile, result, charmap, ignore_content);
2069 ctype = result->categories[LC_CTYPE].ctype;
2070
2071 /* Remember the repertoire we use. */
2072 if (!ignore_content)
2073 ctype->repertoire = repertoire;
2074
2075 while (1)
2076 {
2077 unsigned long int class_bit = 0;
2078 unsigned long int class256_bit = 0;
2079 int handle_digits = 0;
2080
2081 /* Of course we don't proceed beyond the end of file. */
2082 if (nowtok == tok_eof)
2083 break;
2084
2085 /* Ingore empty lines. */
2086 if (nowtok == tok_eol)
2087 {
2088 now = lr_token (ldfile, charmap, NULL);
2089 nowtok = now->tok;
2090 continue;
2091 }
2092
2093 switch (nowtok)
2094 {
2095 case tok_charclass:
2096 now = lr_token (ldfile, charmap, NULL);
2097 while (now->tok == tok_ident || now->tok == tok_string)
2098 {
2099 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2100 now = lr_token (ldfile, charmap, NULL);
2101 if (now->tok != tok_semicolon)
2102 break;
2103 now = lr_token (ldfile, charmap, NULL);
2104 }
2105 if (now->tok != tok_eol)
2106 SYNTAX_ERROR (_("\
2107 %s: syntax error in definition of new character class"), "LC_CTYPE");
2108 break;
2109
2110 case tok_charconv:
2111 now = lr_token (ldfile, charmap, NULL);
2112 while (now->tok == tok_ident || now->tok == tok_string)
2113 {
2114 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2115 now = lr_token (ldfile, charmap, NULL);
2116 if (now->tok != tok_semicolon)
2117 break;
2118 now = lr_token (ldfile, charmap, NULL);
2119 }
2120 if (now->tok != tok_eol)
2121 SYNTAX_ERROR (_("\
2122 %s: syntax error in definition of new character map"), "LC_CTYPE");
2123 break;
2124
2125 case tok_class:
2126 /* Ignore the rest of the line if we don't need the input of
2127 this line. */
2128 if (ignore_content)
2129 {
2130 lr_ignore_rest (ldfile, 0);
2131 break;
2132 }
2133
2134 /* We simply forget the `class' keyword and use the following
2135 operand to determine the bit. */
2136 now = lr_token (ldfile, charmap, NULL);
2137 if (now->tok == tok_ident || now->tok == tok_string)
2138 {
2139 /* Must can be one of the predefined class names. */
2140 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2141 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2142 break;
2143 if (cnt >= ctype->nr_charclass)
2144 {
2145 #ifdef PREDEFINED_CLASSES
2146 if (now->val.str.lenmb == 8
2147 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2148 class_bit = _ISwspecial1;
2149 else if (now->val.str.lenmb == 8
2150 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2151 class_bit = _ISwspecial2;
2152 else if (now->val.str.lenmb == 8
2153 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2154 class_bit = _ISwspecial3;
2155 else
2156 #endif
2157 {
2158 /* OK, it's a new class. */
2159 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2160
2161 class_bit = _ISwbit (ctype->nr_charclass - 1);
2162 }
2163 }
2164 else
2165 {
2166 class_bit = _ISwbit (cnt);
2167
2168 free (now->val.str.startmb);
2169 }
2170 }
2171 else if (now->tok == tok_digit)
2172 goto handle_tok_digit;
2173 else if (now->tok < tok_upper || now->tok > tok_blank)
2174 goto err_label;
2175 else
2176 {
2177 class_bit = BITw (now->tok);
2178 class256_bit = BIT (now->tok);
2179 }
2180
2181 /* The next character must be a semicolon. */
2182 now = lr_token (ldfile, charmap, NULL);
2183 if (now->tok != tok_semicolon)
2184 goto err_label;
2185 goto read_charclass;
2186
2187 case tok_upper:
2188 case tok_lower:
2189 case tok_alpha:
2190 case tok_alnum:
2191 case tok_space:
2192 case tok_cntrl:
2193 case tok_punct:
2194 case tok_graph:
2195 case tok_print:
2196 case tok_xdigit:
2197 case tok_blank:
2198 /* Ignore the rest of the line if we don't need the input of
2199 this line. */
2200 if (ignore_content)
2201 {
2202 lr_ignore_rest (ldfile, 0);
2203 break;
2204 }
2205
2206 class_bit = BITw (now->tok);
2207 class256_bit = BIT (now->tok);
2208 handle_digits = 0;
2209 read_charclass:
2210 ctype->class_done |= class_bit;
2211 last_token = tok_none;
2212 ellipsis_token = tok_none;
2213 step = 1;
2214 now = lr_token (ldfile, charmap, NULL);
2215 while (now->tok != tok_eol && now->tok != tok_eof)
2216 {
2217 uint32_t wch;
2218 struct charseq *seq;
2219
2220 if (ellipsis_token == tok_none)
2221 {
2222 if (get_character (now, charmap, repertoire, &seq, &wch))
2223 goto err_label;
2224
2225 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2226 /* Yep, we can store information about this byte
2227 sequence. */
2228 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2229
2230 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2231 && class_bit != 0)
2232 /* We have the UCS4 position. */
2233 *find_idx (ctype, &ctype->class_collection,
2234 &ctype->class_collection_max,
2235 &ctype->class_collection_act, wch) |= class_bit;
2236
2237 last_token = now->tok;
2238 /* Terminate the string. */
2239 if (last_token == tok_bsymbol)
2240 {
2241 now->val.str.startmb[now->val.str.lenmb] = '\0';
2242 last_str = now->val.str.startmb;
2243 }
2244 else
2245 last_str = NULL;
2246 last_seq = seq;
2247 last_wch = wch;
2248 memcpy (last_charcode, now->val.charcode.bytes, 16);
2249 last_charcode_len = now->val.charcode.nbytes;
2250
2251 if (!ignore_content && handle_digits == 1)
2252 {
2253 /* We must store the digit values. */
2254 if (ctype->mbdigits_act == ctype->mbdigits_max)
2255 {
2256 ctype->mbdigits_max += 10;
2257 ctype->mbdigits = xrealloc (ctype->mbdigits,
2258 (ctype->mbdigits_max
2259 * sizeof (char *)));
2260 ctype->wcdigits_max += 10;
2261 ctype->wcdigits = xrealloc (ctype->wcdigits,
2262 (ctype->wcdigits_max
2263 * sizeof (uint32_t)));
2264 }
2265
2266 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2267 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2268 }
2269 else if (!ignore_content && handle_digits == 2)
2270 {
2271 /* We must store the digit values. */
2272 if (ctype->outdigits_act >= 10)
2273 {
2274 lr_error (ldfile, _("\
2275 %s: field `%s' does not contain exactly ten entries"),
2276 "LC_CTYPE", "outdigit");
2277 goto err_label;
2278 }
2279
2280 ctype->mboutdigits[ctype->outdigits_act] = seq;
2281 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2282 ++ctype->outdigits_act;
2283 }
2284 }
2285 else
2286 {
2287 /* Now it gets complicated. We have to resolve the
2288 ellipsis problem. First we must distinguish between
2289 the different kind of ellipsis and this must match the
2290 tokens we have seen. */
2291 assert (last_token != tok_none);
2292
2293 if (last_token != now->tok)
2294 {
2295 lr_error (ldfile, _("\
2296 ellipsis range must be marked by two operands of same type"));
2297 lr_ignore_rest (ldfile, 0);
2298 break;
2299 }
2300
2301 if (last_token == tok_bsymbol)
2302 {
2303 if (ellipsis_token == tok_ellipsis3)
2304 lr_error (ldfile, _("with symbolic name range values \
2305 the absolute ellipsis `...' must not be used"));
2306
2307 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2308 repertoire, now, last_str,
2309 class256_bit, class_bit,
2310 (ellipsis_token
2311 == tok_ellipsis4
2312 ? 10 : 16),
2313 ignore_content,
2314 handle_digits, step);
2315 }
2316 else if (last_token == tok_ucs4)
2317 {
2318 if (ellipsis_token != tok_ellipsis2)
2319 lr_error (ldfile, _("\
2320 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2321
2322 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2323 repertoire, now, last_wch,
2324 class256_bit, class_bit,
2325 ignore_content, handle_digits,
2326 step);
2327 }
2328 else
2329 {
2330 assert (last_token == tok_charcode);
2331
2332 if (ellipsis_token != tok_ellipsis3)
2333 lr_error (ldfile, _("\
2334 with character code range values one must use the absolute ellipsis `...'"));
2335
2336 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2337 repertoire, now,
2338 last_charcode,
2339 last_charcode_len,
2340 class256_bit, class_bit,
2341 ignore_content,
2342 handle_digits);
2343 }
2344
2345 /* Now we have used the last value. */
2346 last_token = tok_none;
2347 }
2348
2349 /* Next we expect a semicolon or the end of the line. */
2350 now = lr_token (ldfile, charmap, NULL);
2351 if (now->tok == tok_eol || now->tok == tok_eof)
2352 break;
2353
2354 if (last_token != tok_none
2355 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2356 {
2357 if (now->tok == tok_ellipsis2_2)
2358 {
2359 now->tok = tok_ellipsis2;
2360 step = 2;
2361 }
2362 else if (now->tok == tok_ellipsis4_2)
2363 {
2364 now->tok = tok_ellipsis4;
2365 step = 2;
2366 }
2367
2368 ellipsis_token = now->tok;
2369
2370 now = lr_token (ldfile, charmap, NULL);
2371 continue;
2372 }
2373
2374 if (now->tok != tok_semicolon)
2375 goto err_label;
2376
2377 /* And get the next character. */
2378 now = lr_token (ldfile, charmap, NULL);
2379
2380 ellipsis_token = tok_none;
2381 step = 1;
2382 }
2383 break;
2384
2385 case tok_digit:
2386 /* Ignore the rest of the line if we don't need the input of
2387 this line. */
2388 if (ignore_content)
2389 {
2390 lr_ignore_rest (ldfile, 0);
2391 break;
2392 }
2393
2394 handle_tok_digit:
2395 class_bit = _ISwdigit;
2396 class256_bit = _ISdigit;
2397 handle_digits = 1;
2398 goto read_charclass;
2399
2400 case tok_outdigit:
2401 /* Ignore the rest of the line if we don't need the input of
2402 this line. */
2403 if (ignore_content)
2404 {
2405 lr_ignore_rest (ldfile, 0);
2406 break;
2407 }
2408
2409 if (ctype->outdigits_act != 0)
2410 lr_error (ldfile, _("\
2411 %s: field `%s' declared more than once"),
2412 "LC_CTYPE", "outdigit");
2413 class_bit = 0;
2414 class256_bit = 0;
2415 handle_digits = 2;
2416 goto read_charclass;
2417
2418 case tok_toupper:
2419 /* Ignore the rest of the line if we don't need the input of
2420 this line. */
2421 if (ignore_content)
2422 {
2423 lr_ignore_rest (ldfile, 0);
2424 break;
2425 }
2426
2427 mapidx = 0;
2428 goto read_mapping;
2429
2430 case tok_tolower:
2431 /* Ignore the rest of the line if we don't need the input of
2432 this line. */
2433 if (ignore_content)
2434 {
2435 lr_ignore_rest (ldfile, 0);
2436 break;
2437 }
2438
2439 mapidx = 1;
2440 goto read_mapping;
2441
2442 case tok_map:
2443 /* Ignore the rest of the line if we don't need the input of
2444 this line. */
2445 if (ignore_content)
2446 {
2447 lr_ignore_rest (ldfile, 0);
2448 break;
2449 }
2450
2451 /* We simply forget the `map' keyword and use the following
2452 operand to determine the mapping. */
2453 now = lr_token (ldfile, charmap, NULL);
2454 if (now->tok == tok_ident || now->tok == tok_string)
2455 {
2456 size_t cnt;
2457
2458 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2459 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2460 break;
2461
2462 if (cnt < ctype->map_collection_nr)
2463 free (now->val.str.startmb);
2464 else
2465 /* OK, it's a new map. */
2466 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2467
2468 mapidx = cnt;
2469 }
2470 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2471 goto err_label;
2472 else
2473 mapidx = now->tok - tok_toupper;
2474
2475 now = lr_token (ldfile, charmap, NULL);
2476 /* This better should be a semicolon. */
2477 if (now->tok != tok_semicolon)
2478 goto err_label;
2479
2480 read_mapping:
2481 /* Test whether this mapping was already defined. */
2482 if (ctype->tomap_done[mapidx])
2483 {
2484 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2485 ctype->mapnames[mapidx]);
2486 lr_ignore_rest (ldfile, 0);
2487 break;
2488 }
2489 ctype->tomap_done[mapidx] = 1;
2490
2491 now = lr_token (ldfile, charmap, NULL);
2492 while (now->tok != tok_eol && now->tok != tok_eof)
2493 {
2494 struct charseq *from_seq;
2495 uint32_t from_wch;
2496 struct charseq *to_seq;
2497 uint32_t to_wch;
2498
2499 /* Every pair starts with an opening brace. */
2500 if (now->tok != tok_open_brace)
2501 goto err_label;
2502
2503 /* Next comes the from-value. */
2504 now = lr_token (ldfile, charmap, NULL);
2505 if (get_character (now, charmap, repertoire, &from_seq,
2506 &from_wch) != 0)
2507 goto err_label;
2508
2509 /* The next is a comma. */
2510 now = lr_token (ldfile, charmap, NULL);
2511 if (now->tok != tok_comma)
2512 goto err_label;
2513
2514 /* And the other value. */
2515 now = lr_token (ldfile, charmap, NULL);
2516 if (get_character (now, charmap, repertoire, &to_seq,
2517 &to_wch) != 0)
2518 goto err_label;
2519
2520 /* And the last thing is the closing brace. */
2521 now = lr_token (ldfile, charmap, NULL);
2522 if (now->tok != tok_close_brace)
2523 goto err_label;
2524
2525 if (!ignore_content)
2526 {
2527 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2528 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2529 /* We can use this value. */
2530 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2531 = to_seq->bytes[0];
2532
2533 if (from_wch != ILLEGAL_CHAR_VALUE
2534 && to_wch != ILLEGAL_CHAR_VALUE)
2535 /* Both correct values. */
2536 *find_idx (ctype, &ctype->map_collection[mapidx],
2537 &ctype->map_collection_max[mapidx],
2538 &ctype->map_collection_act[mapidx],
2539 from_wch) = to_wch;
2540 }
2541
2542 /* Now comes a semicolon or the end of the line/file. */
2543 now = lr_token (ldfile, charmap, NULL);
2544 if (now->tok == tok_semicolon)
2545 now = lr_token (ldfile, charmap, NULL);
2546 }
2547 break;
2548
2549 case tok_translit_start:
2550 /* Ignore the rest of the line if we don't need the input of
2551 this line. */
2552 if (ignore_content)
2553 {
2554 lr_ignore_rest (ldfile, 0);
2555 break;
2556 }
2557
2558 /* The rest of the line better should be empty. */
2559 lr_ignore_rest (ldfile, 1);
2560
2561 /* We count here the number of allocated entries in the `translit'
2562 array. */
2563 cnt = 0;
2564
2565 /* We proceed until we see the `translit_end' token. */
2566 while (now = lr_token (ldfile, charmap, repertoire),
2567 now->tok != tok_translit_end && now->tok != tok_eof)
2568 {
2569 if (now->tok == tok_eol)
2570 /* Ignore empty lines. */
2571 continue;
2572
2573 if (now->tok == tok_translit_end)
2574 {
2575 lr_ignore_rest (ldfile, 0);
2576 break;
2577 }
2578
2579 if (now->tok == tok_include)
2580 {
2581 /* We have to include locale. */
2582 const char *locale_name;
2583 const char *repertoire_name;
2584
2585 now = lr_token (ldfile, charmap, NULL);
2586 /* This should be a string or an identifier. In any
2587 case something to name a locale. */
2588 if (now->tok != tok_string && now->tok != tok_ident)
2589 {
2590 translit_syntax:
2591 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2592 lr_ignore_rest (ldfile, 0);
2593 continue;
2594 }
2595 locale_name = now->val.str.startmb;
2596
2597 /* Next should be a semicolon. */
2598 now = lr_token (ldfile, charmap, NULL);
2599 if (now->tok != tok_semicolon)
2600 goto translit_syntax;
2601
2602 /* Now the repertoire name. */
2603 now = lr_token (ldfile, charmap, NULL);
2604 if ((now->tok != tok_string && now->tok != tok_ident)
2605 || now->val.str.startmb == NULL)
2606 goto translit_syntax;
2607 repertoire_name = now->val.str.startmb;
2608
2609 /* We must not have more than one `include'. */
2610 if (ctype->translit_copy_locale != NULL)
2611 {
2612 lr_error (ldfile, _("\
2613 %s: only one `include' instruction allowed"), "LC_CTYPE");
2614 lr_ignore_rest (ldfile, 0);
2615 continue;
2616 }
2617
2618 ctype->translit_copy_locale = locale_name;
2619 ctype->translit_copy_repertoire = repertoire_name;
2620
2621 /* The rest of the line must be empty. */
2622 lr_ignore_rest (ldfile, 1);
2623
2624 /* Make sure the locale is read. */
2625 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2626 repertoire_name, 1, NULL);
2627 continue;
2628 }
2629 else if (now->tok == tok_default_missing)
2630 {
2631 uint32_t *wstr;
2632
2633 /* We expect a single character or string as the
2634 argument. */
2635 now = lr_token (ldfile, charmap, NULL);
2636 wstr = read_widestring (ldfile, now, charmap, repertoire);
2637
2638 if (wstr != NULL)
2639 {
2640 if (ctype->default_missing != NULL)
2641 {
2642 lr_error (ldfile, _("\
2643 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2644 error_at_line (0, 0, ctype->default_missing_file,
2645 ctype->default_missing_lineno,
2646 _("previous definition was here"));
2647 }
2648 else
2649 {
2650 ctype->default_missing = wstr;
2651 ctype->default_missing_file = ldfile->fname;
2652 ctype->default_missing_lineno = ldfile->lineno;
2653 }
2654 }
2655 lr_ignore_rest (ldfile, 1);
2656 continue;
2657 }
2658 else if (now->tok == tok_translit_ignore)
2659 {
2660 read_translit_ignore_entry (ldfile, ctype, charmap,
2661 repertoire);
2662 continue;
2663 }
2664
2665 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2666 }
2667 break;
2668
2669 case tok_ident:
2670 /* Ignore the rest of the line if we don't need the input of
2671 this line. */
2672 if (ignore_content)
2673 {
2674 lr_ignore_rest (ldfile, 0);
2675 break;
2676 }
2677
2678 /* This could mean one of several things. First test whether
2679 it's a character class name. */
2680 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2681 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2682 break;
2683 if (cnt < ctype->nr_charclass)
2684 {
2685 class_bit = _ISwbit (cnt);
2686 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2687 free (now->val.str.startmb);
2688 goto read_charclass;
2689 }
2690 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2691 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2692 break;
2693 if (cnt < ctype->map_collection_nr)
2694 {
2695 mapidx = cnt;
2696 free (now->val.str.startmb);
2697 goto read_mapping;
2698 }
2699 #ifdef PREDEFINED_CLASSES
2700 if (strcmp (now->val.str.startmb, "special1") == 0)
2701 {
2702 class_bit = _ISwspecial1;
2703 free (now->val.str.startmb);
2704 goto read_charclass;
2705 }
2706 if (strcmp (now->val.str.startmb, "special2") == 0)
2707 {
2708 class_bit = _ISwspecial2;
2709 free (now->val.str.startmb);
2710 goto read_charclass;
2711 }
2712 if (strcmp (now->val.str.startmb, "special3") == 0)
2713 {
2714 class_bit = _ISwspecial3;
2715 free (now->val.str.startmb);
2716 goto read_charclass;
2717 }
2718 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2719 {
2720 mapidx = 2;
2721 goto read_mapping;
2722 }
2723 #endif
2724 break;
2725
2726 case tok_end:
2727 /* Next we assume `LC_CTYPE'. */
2728 now = lr_token (ldfile, charmap, NULL);
2729 if (now->tok == tok_eof)
2730 break;
2731 if (now->tok == tok_eol)
2732 lr_error (ldfile, _("%s: incomplete `END' line"),
2733 "LC_CTYPE");
2734 else if (now->tok != tok_lc_ctype)
2735 lr_error (ldfile, _("\
2736 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2737 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2738 return;
2739
2740 default:
2741 err_label:
2742 if (now->tok != tok_eof)
2743 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2744 }
2745
2746 /* Prepare for the next round. */
2747 now = lr_token (ldfile, charmap, NULL);
2748 nowtok = now->tok;
2749 }
2750
2751 /* When we come here we reached the end of the file. */
2752 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2753 }
2754
2755
2756 static void
2757 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2758 struct repertoire_t *repertoire)
2759 {
2760 size_t cnt;
2761
2762 /* These function defines the default values for the classes and conversions
2763 according to POSIX.2 2.5.2.1.
2764 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2765 Don't move them unless you know what you do! */
2766
2767 void set_default (int bitpos, int from, int to)
2768 {
2769 char tmp[2];
2770 int ch;
2771 int bit = _ISbit (bitpos);
2772 int bitw = _ISwbit (bitpos);
2773 /* Define string. */
2774 strcpy (tmp, "?");
2775
2776 for (ch = from; ch <= to; ++ch)
2777 {
2778 struct charseq *seq;
2779 tmp[0] = ch;
2780
2781 seq = charmap_find_value (charmap, tmp, 1);
2782 if (seq == NULL)
2783 {
2784 if (!be_quiet)
2785 error (0, 0, _("\
2786 %s: character `%s' not defined in charmap while needed as default value"),
2787 "LC_CTYPE", tmp);
2788 }
2789 else if (seq->nbytes != 1)
2790 error (0, 0, _("\
2791 %s: character `%s' in charmap not representable with one byte"),
2792 "LC_CTYPE", tmp);
2793 else
2794 ctype->class256_collection[seq->bytes[0]] |= bit;
2795
2796 /* No need to search here, the ASCII value is also the Unicode
2797 value. */
2798 ELEM (ctype, class_collection, , ch) |= bitw;
2799 }
2800 }
2801
2802 /* Set default values if keyword was not present. */
2803 if ((ctype->class_done & BITw (tok_upper)) == 0)
2804 /* "If this keyword [lower] is not specified, the lowercase letters
2805 `A' through `Z', ..., shall automatically belong to this class,
2806 with implementation defined character values." [P1003.2, 2.5.2.1] */
2807 set_default (BITPOS (tok_upper), 'A', 'Z');
2808
2809 if ((ctype->class_done & BITw (tok_lower)) == 0)
2810 /* "If this keyword [lower] is not specified, the lowercase letters
2811 `a' through `z', ..., shall automatically belong to this class,
2812 with implementation defined character values." [P1003.2, 2.5.2.1] */
2813 set_default (BITPOS (tok_lower), 'a', 'z');
2814
2815 if ((ctype->class_done & BITw (tok_alpha)) == 0)
2816 {
2817 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2818 class `lower' *must* be in class `alpha'. */
2819 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2820 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2821
2822 for (cnt = 0; cnt < 256; ++cnt)
2823 if ((ctype->class256_collection[cnt] & mask) != 0)
2824 ctype->class256_collection[cnt] |= BIT (tok_alpha);
2825
2826 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2827 if ((ctype->class_collection[cnt] & maskw) != 0)
2828 ctype->class_collection[cnt] |= BITw (tok_alpha);
2829 }
2830
2831 if ((ctype->class_done & BITw (tok_digit)) == 0)
2832 /* "If this keyword [digit] is not specified, the digits `0' through
2833 `9', ..., shall automatically belong to this class, with
2834 implementation-defined character values." [P1003.2, 2.5.2.1] */
2835 set_default (BITPOS (tok_digit), '0', '9');
2836
2837 /* "Only characters specified for the `alpha' and `digit' keyword
2838 shall be specified. Characters specified for the keyword `alpha'
2839 and `digit' are automatically included in this class. */
2840 {
2841 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2842 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2843
2844 for (cnt = 0; cnt < 256; ++cnt)
2845 if ((ctype->class256_collection[cnt] & mask) != 0)
2846 ctype->class256_collection[cnt] |= BIT (tok_alnum);
2847
2848 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2849 if ((ctype->class_collection[cnt] & maskw) != 0)
2850 ctype->class_collection[cnt] |= BITw (tok_alnum);
2851 }
2852
2853 if ((ctype->class_done & BITw (tok_space)) == 0)
2854 /* "If this keyword [space] is not specified, the characters <space>,
2855 <form-feed>, <newline>, <carriage-return>, <tab>, and
2856 <vertical-tab>, ..., shall automatically belong to this class,
2857 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2858 {
2859 struct charseq *seq;
2860
2861 seq = charmap_find_value (charmap, "space", 5);
2862 if (seq == NULL)
2863 seq = charmap_find_value (charmap, "SP", 2);
2864 if (seq == NULL)
2865 seq = charmap_find_value (charmap, "U00000020", 9);
2866 if (seq == NULL)
2867 {
2868 if (!be_quiet)
2869 error (0, 0, _("\
2870 %s: character `%s' not defined while needed as default value"),
2871 "LC_CTYPE", "<space>");
2872 }
2873 else if (seq->nbytes != 1)
2874 error (0, 0, _("\
2875 %s: character `%s' in charmap not representable with one byte"),
2876 "LC_CTYPE", "<space>");
2877 else
2878 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2879
2880 /* No need to search. */
2881 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2882
2883 seq = charmap_find_value (charmap, "form-feed", 9);
2884 if (seq == NULL)
2885 seq = charmap_find_value (charmap, "U0000000C", 9);
2886 if (seq == NULL)
2887 {
2888 if (!be_quiet)
2889 error (0, 0, _("\
2890 %s: character `%s' not defined while needed as default value"),
2891 "LC_CTYPE", "<form-feed>");
2892 }
2893 else if (seq->nbytes != 1)
2894 error (0, 0, _("\
2895 %s: character `%s' in charmap not representable with one byte"),
2896 "LC_CTYPE", "<form-feed>");
2897 else
2898 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2899
2900 /* No need to search. */
2901 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
2902
2903
2904 seq = charmap_find_value (charmap, "newline", 7);
2905 if (seq == NULL)
2906 seq = charmap_find_value (charmap, "U0000000A", 9);
2907 if (seq == NULL)
2908 {
2909 if (!be_quiet)
2910 error (0, 0, _("\
2911 character `%s' not defined while needed as default value"),
2912 "<newline>");
2913 }
2914 else if (seq->nbytes != 1)
2915 error (0, 0, _("\
2916 %s: character `%s' in charmap not representable with one byte"),
2917 "LC_CTYPE", "<newline>");
2918 else
2919 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2920
2921 /* No need to search. */
2922 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
2923
2924
2925 seq = charmap_find_value (charmap, "carriage-return", 15);
2926 if (seq == NULL)
2927 seq = charmap_find_value (charmap, "U0000000D", 9);
2928 if (seq == NULL)
2929 {
2930 if (!be_quiet)
2931 error (0, 0, _("\
2932 %s: character `%s' not defined while needed as default value"),
2933 "LC_CTYPE", "<carriage-return>");
2934 }
2935 else if (seq->nbytes != 1)
2936 error (0, 0, _("\
2937 %s: character `%s' in charmap not representable with one byte"),
2938 "LC_CTYPE", "<carriage-return>");
2939 else
2940 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2941
2942 /* No need to search. */
2943 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
2944
2945
2946 seq = charmap_find_value (charmap, "tab", 3);
2947 if (seq == NULL)
2948 seq = charmap_find_value (charmap, "U00000009", 9);
2949 if (seq == NULL)
2950 {
2951 if (!be_quiet)
2952 error (0, 0, _("\
2953 %s: character `%s' not defined while needed as default value"),
2954 "LC_CTYPE", "<tab>");
2955 }
2956 else if (seq->nbytes != 1)
2957 error (0, 0, _("\
2958 %s: character `%s' in charmap not representable with one byte"),
2959 "LC_CTYPE", "<tab>");
2960 else
2961 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2962
2963 /* No need to search. */
2964 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
2965
2966
2967 seq = charmap_find_value (charmap, "vertical-tab", 12);
2968 if (seq == NULL)
2969 seq = charmap_find_value (charmap, "U0000000B", 9);
2970 if (seq == NULL)
2971 {
2972 if (!be_quiet)
2973 error (0, 0, _("\
2974 %s: character `%s' not defined while needed as default value"),
2975 "LC_CTYPE", "<vertical-tab>");
2976 }
2977 else if (seq->nbytes != 1)
2978 error (0, 0, _("\
2979 %s: character `%s' in charmap not representable with one byte"),
2980 "LC_CTYPE", "<vertical-tab>");
2981 else
2982 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2983
2984 /* No need to search. */
2985 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
2986 }
2987
2988 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2989 /* "If this keyword is not specified, the digits `0' to `9', the
2990 uppercase letters `A' through `F', and the lowercase letters `a'
2991 through `f', ..., shell automatically belong to this class, with
2992 implementation defined character values." [P1003.2, 2.5.2.1] */
2993 {
2994 set_default (BITPOS (tok_xdigit), '0', '9');
2995 set_default (BITPOS (tok_xdigit), 'A', 'F');
2996 set_default (BITPOS (tok_xdigit), 'a', 'f');
2997 }
2998
2999 if ((ctype->class_done & BITw (tok_blank)) == 0)
3000 /* "If this keyword [blank] is unspecified, the characters <space> and
3001 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3002 {
3003 struct charseq *seq;
3004
3005 seq = charmap_find_value (charmap, "space", 5);
3006 if (seq == NULL)
3007 seq = charmap_find_value (charmap, "SP", 2);
3008 if (seq == NULL)
3009 seq = charmap_find_value (charmap, "U00000020", 9);
3010 if (seq == NULL)
3011 {
3012 if (!be_quiet)
3013 error (0, 0, _("\
3014 %s: character `%s' not defined while needed as default value"),
3015 "LC_CTYPE", "<space>");
3016 }
3017 else if (seq->nbytes != 1)
3018 error (0, 0, _("\
3019 %s: character `%s' in charmap not representable with one byte"),
3020 "LC_CTYPE", "<space>");
3021 else
3022 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3023
3024 /* No need to search. */
3025 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3026
3027
3028 seq = charmap_find_value (charmap, "tab", 3);
3029 if (seq == NULL)
3030 seq = charmap_find_value (charmap, "U00000009", 9);
3031 if (seq == NULL)
3032 {
3033 if (!be_quiet)
3034 error (0, 0, _("\
3035 %s: character `%s' not defined while needed as default value"),
3036 "LC_CTYPE", "<tab>");
3037 }
3038 else if (seq->nbytes != 1)
3039 error (0, 0, _("\
3040 %s: character `%s' in charmap not representable with one byte"),
3041 "LC_CTYPE", "<tab>");
3042 else
3043 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3044
3045 /* No need to search. */
3046 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3047 }
3048
3049 if ((ctype->class_done & BITw (tok_graph)) == 0)
3050 /* "If this keyword [graph] is not specified, characters specified for
3051 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3052 shall belong to this character class." [P1003.2, 2.5.2.1] */
3053 {
3054 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3055 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3056 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3057 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3058 BITw (tok_punct);
3059 size_t cnt;
3060
3061 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3062 if ((ctype->class_collection[cnt] & maskw) != 0)
3063 ctype->class_collection[cnt] |= BITw (tok_graph);
3064
3065 for (cnt = 0; cnt < 256; ++cnt)
3066 if ((ctype->class256_collection[cnt] & mask) != 0)
3067 ctype->class256_collection[cnt] |= BIT (tok_graph);
3068 }
3069
3070 if ((ctype->class_done & BITw (tok_print)) == 0)
3071 /* "If this keyword [print] is not provided, characters specified for
3072 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3073 and the <space> character shall belong to this character class."
3074 [P1003.2, 2.5.2.1] */
3075 {
3076 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3077 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3078 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3079 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3080 BITw (tok_punct);
3081 size_t cnt;
3082 struct charseq *seq;
3083
3084 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3085 if ((ctype->class_collection[cnt] & maskw) != 0)
3086 ctype->class_collection[cnt] |= BITw (tok_print);
3087
3088 for (cnt = 0; cnt < 256; ++cnt)
3089 if ((ctype->class256_collection[cnt] & mask) != 0)
3090 ctype->class256_collection[cnt] |= BIT (tok_print);
3091
3092
3093 seq = charmap_find_value (charmap, "space", 5);
3094 if (seq == NULL)
3095 seq = charmap_find_value (charmap, "SP", 2);
3096 if (seq == NULL)
3097 seq = charmap_find_value (charmap, "U00000020", 9);
3098 if (seq == NULL)
3099 {
3100 if (!be_quiet)
3101 error (0, 0, _("\
3102 %s: character `%s' not defined while needed as default value"),
3103 "LC_CTYPE", "<space>");
3104 }
3105 else if (seq->nbytes != 1)
3106 error (0, 0, _("\
3107 %s: character `%s' in charmap not representable with one byte"),
3108 "LC_CTYPE", "<space>");
3109 else
3110 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3111
3112 /* No need to search. */
3113 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3114 }
3115
3116 if (ctype->tomap_done[0] == 0)
3117 /* "If this keyword [toupper] is not specified, the lowercase letters
3118 `a' through `z', and their corresponding uppercase letters `A' to
3119 `Z', ..., shall automatically be included, with implementation-
3120 defined character values." [P1003.2, 2.5.2.1] */
3121 {
3122 char tmp[4];
3123 int ch;
3124
3125 strcpy (tmp, "<?>");
3126
3127 for (ch = 'a'; ch <= 'z'; ++ch)
3128 {
3129 struct charseq *seq_from, *seq_to;
3130
3131 tmp[1] = (char) ch;
3132
3133 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3134 if (seq_from == NULL)
3135 {
3136 if (!be_quiet)
3137 error (0, 0, _("\
3138 %s: character `%s' not defined while needed as default value"),
3139 "LC_CTYPE", tmp);
3140 }
3141 else if (seq_from->nbytes != 1)
3142 {
3143 if (!be_quiet)
3144 error (0, 0, _("\
3145 %s: character `%s' needed as default value not representable with one byte"),
3146 "LC_CTYPE", tmp);
3147 }
3148 else
3149 {
3150 /* This conversion is implementation defined. */
3151 tmp[1] = (char) (ch + ('A' - 'a'));
3152 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3153 if (seq_to == NULL)
3154 {
3155 if (!be_quiet)
3156 error (0, 0, _("\
3157 %s: character `%s' not defined while needed as default value"),
3158 "LC_CTYPE", tmp);
3159 }
3160 else if (seq_to->nbytes != 1)
3161 {
3162 if (!be_quiet)
3163 error (0, 0, _("\
3164 %s: character `%s' needed as default value not representable with one byte"),
3165 "LC_CTYPE", tmp);
3166 }
3167 else
3168 /* The index [0] is determined by the order of the
3169 `ctype_map_newP' calls in `ctype_startup'. */
3170 ctype->map256_collection[0][seq_from->bytes[0]]
3171 = seq_to->bytes[0];
3172 }
3173
3174 /* No need to search. */
3175 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3176 }
3177 }
3178
3179 if (ctype->tomap_done[1] == 0)
3180 /* "If this keyword [tolower] is not specified, the mapping shall be
3181 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3182 {
3183 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3184 if (ctype->map_collection[0][cnt] != 0)
3185 ELEM (ctype, map_collection, [1],
3186 ctype->map_collection[0][cnt])
3187 = ctype->charnames[cnt];
3188
3189 for (cnt = 0; cnt < 256; ++cnt)
3190 if (ctype->map256_collection[0][cnt] != 0)
3191 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3192 }
3193
3194 if (ctype->outdigits_act == 0)
3195 {
3196 for (cnt = 0; cnt < 10; ++cnt)
3197 {
3198 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3199 digits + cnt, 1);
3200
3201 if (ctype->mboutdigits[cnt] == NULL)
3202 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3203 longnames[cnt],
3204 strlen (longnames[cnt]));
3205
3206 if (ctype->mboutdigits[cnt] == NULL)
3207 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3208 uninames[cnt], 9);
3209
3210 if (ctype->mboutdigits[cnt] == NULL)
3211 {
3212 /* Provide a replacement. */
3213 error (0, 0, _("\
3214 no output digits defined and none of the standard names in the charmap"));
3215
3216 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3217 sizeof (struct charseq)
3218 + 1);
3219
3220 /* This is better than nothing. */
3221 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3222 ctype->mboutdigits[cnt]->nbytes = 1;
3223 }
3224
3225 ctype->wcoutdigits[cnt] = L'0' + cnt;
3226 }
3227
3228 ctype->outdigits_act = 10;
3229 }
3230 }
3231
3232
3233 /* Construction of sparse 3-level tables.
3234 See wchar-lookup.h for their structure and the meaning of p and q. */
3235
3236 struct wctype_table
3237 {
3238 /* Parameters. */
3239 unsigned int p;
3240 unsigned int q;
3241 /* Working representation. */
3242 size_t level1_alloc;
3243 size_t level1_size;
3244 uint32_t *level1;
3245 size_t level2_alloc;
3246 size_t level2_size;
3247 uint32_t *level2;
3248 size_t level3_alloc;
3249 size_t level3_size;
3250 uint32_t *level3;
3251 /* Compressed representation. */
3252 size_t result_size;
3253 char *result;
3254 };
3255
3256 /* Initialize. Assumes t->p and t->q have already been set. */
3257 static inline void
3258 wctype_table_init (struct wctype_table *t)
3259 {
3260 t->level1_alloc = t->level1_size = 0;
3261 t->level2_alloc = t->level2_size = 0;
3262 t->level3_alloc = t->level3_size = 0;
3263 }
3264
3265 /* Add one entry. */
3266 static void
3267 wctype_table_add (struct wctype_table *t, uint32_t wc)
3268 {
3269 uint32_t index1 = wc >> (t->q + t->p + 5);
3270 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3271 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3272 uint32_t index4 = wc & 0x1f;
3273 size_t i, i1, i2;
3274
3275 if (index1 >= t->level1_size)
3276 {
3277 if (index1 >= t->level1_alloc)
3278 {
3279 size_t alloc = 2 * t->level1_alloc;
3280 if (alloc <= index1)
3281 alloc = index1 + 1;
3282 t->level1 = (t->level1_alloc > 0
3283 ? (uint32_t *) xrealloc ((char *) t->level1,
3284 alloc * sizeof (uint32_t))
3285 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3286 t->level1_alloc = alloc;
3287 }
3288 while (index1 >= t->level1_size)
3289 t->level1[t->level1_size++] = ~((uint32_t) 0);
3290 }
3291
3292 if (t->level1[index1] == ~((uint32_t) 0))
3293 {
3294 if (t->level2_size == t->level2_alloc)
3295 {
3296 size_t alloc = 2 * t->level2_alloc + 1;
3297 t->level2 = (t->level2_alloc > 0
3298 ? (uint32_t *) xrealloc ((char *) t->level2,
3299 (alloc << t->q) * sizeof (uint32_t))
3300 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3301 t->level2_alloc = alloc;
3302 }
3303 i1 = t->level2_size << t->q;
3304 i2 = (t->level2_size + 1) << t->q;
3305 for (i = i1; i < i2; i++)
3306 t->level2[i] = ~((uint32_t) 0);
3307 t->level1[index1] = t->level2_size++;
3308 }
3309
3310 index2 += t->level1[index1] << t->q;
3311
3312 if (t->level2[index2] == ~((uint32_t) 0))
3313 {
3314 if (t->level3_size == t->level3_alloc)
3315 {
3316 size_t alloc = 2 * t->level3_alloc + 1;
3317 t->level3 = (t->level3_alloc > 0
3318 ? (uint32_t *) xrealloc ((char *) t->level3,
3319 (alloc << t->p) * sizeof (uint32_t))
3320 : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
3321 t->level3_alloc = alloc;
3322 }
3323 i1 = t->level3_size << t->p;
3324 i2 = (t->level3_size + 1) << t->p;
3325 for (i = i1; i < i2; i++)
3326 t->level3[i] = 0;
3327 t->level2[index2] = t->level3_size++;
3328 }
3329
3330 index3 += t->level2[index2] << t->p;
3331
3332 t->level3[index3] |= (uint32_t)1 << index4;
3333 }
3334
3335 /* Finalize and shrink. */
3336 static void
3337 wctype_table_finalize (struct wctype_table *t)
3338 {
3339 size_t i, j, k;
3340 uint32_t reorder3[t->level3_size];
3341 uint32_t reorder2[t->level2_size];
3342 uint32_t level1_offset, level2_offset, level3_offset;
3343
3344 /* Uniquify level3 blocks. */
3345 k = 0;
3346 for (j = 0; j < t->level3_size; j++)
3347 {
3348 for (i = 0; i < k; i++)
3349 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3350 (1 << t->p) * sizeof (uint32_t)) == 0)
3351 break;
3352 /* Relocate block j to block i. */
3353 reorder3[j] = i;
3354 if (i == k)
3355 {
3356 if (i != j)
3357 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3358 (1 << t->p) * sizeof (uint32_t));
3359 k++;
3360 }
3361 }
3362 t->level3_size = k;
3363
3364 for (i = 0; i < (t->level2_size << t->q); i++)
3365 if (t->level2[i] != ~((uint32_t) 0))
3366 t->level2[i] = reorder3[t->level2[i]];
3367
3368 /* Uniquify level2 blocks. */
3369 k = 0;
3370 for (j = 0; j < t->level2_size; j++)
3371 {
3372 for (i = 0; i < k; i++)
3373 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3374 (1 << t->q) * sizeof (uint32_t)) == 0)
3375 break;
3376 /* Relocate block j to block i. */
3377 reorder2[j] = i;
3378 if (i == k)
3379 {
3380 if (i != j)
3381 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3382 (1 << t->q) * sizeof (uint32_t));
3383 k++;
3384 }
3385 }
3386 t->level2_size = k;
3387
3388 for (i = 0; i < t->level1_size; i++)
3389 if (t->level1[i] != ~((uint32_t) 0))
3390 t->level1[i] = reorder2[t->level1[i]];
3391
3392 /* Create and fill the resulting compressed representation. */
3393 t->result_size =
3394 5 * sizeof (uint32_t)
3395 + t->level1_size * sizeof (uint32_t)
3396 + (t->level2_size << t->q) * sizeof (uint32_t)
3397 + (t->level3_size << t->p) * sizeof (uint32_t);
3398 t->result = (char *) xmalloc (t->result_size);
3399
3400 level1_offset =
3401 5 * sizeof (uint32_t);
3402 level2_offset =
3403 5 * sizeof (uint32_t)
3404 + t->level1_size * sizeof (uint32_t);
3405 level3_offset =
3406 5 * sizeof (uint32_t)
3407 + t->level1_size * sizeof (uint32_t)
3408 + (t->level2_size << t->q) * sizeof (uint32_t);
3409
3410 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3411 ((uint32_t *) t->result)[1] = t->level1_size;
3412 ((uint32_t *) t->result)[2] = t->p + 5;
3413 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3414 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3415
3416 for (i = 0; i < t->level1_size; i++)
3417 ((uint32_t *) (t->result + level1_offset))[i] =
3418 (t->level1[i] == ~((uint32_t) 0)
3419 ? 0
3420 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3421
3422 for (i = 0; i < (t->level2_size << t->q); i++)
3423 ((uint32_t *) (t->result + level2_offset))[i] =
3424 (t->level2[i] == ~((uint32_t) 0)
3425 ? 0
3426 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3427
3428 for (i = 0; i < (t->level3_size << t->p); i++)
3429 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3430
3431 if (t->level1_alloc > 0)
3432 free (t->level1);
3433 if (t->level2_alloc > 0)
3434 free (t->level2);
3435 if (t->level3_alloc > 0)
3436 free (t->level3);
3437 }
3438
3439 struct wcwidth_table
3440 {
3441 /* Parameters. */
3442 unsigned int p;
3443 unsigned int q;
3444 /* Working representation. */
3445 size_t level1_alloc;
3446 size_t level1_size;
3447 uint32_t *level1;
3448 size_t level2_alloc;
3449 size_t level2_size;
3450 uint32_t *level2;
3451 size_t level3_alloc;
3452 size_t level3_size;
3453 uint8_t *level3;
3454 /* Compressed representation. */
3455 size_t result_size;
3456 char *result;
3457 };
3458
3459 /* Initialize. Assumes t->p and t->q have already been set. */
3460 static inline void
3461 wcwidth_table_init (struct wcwidth_table *t)
3462 {
3463 t->level1_alloc = t->level1_size = 0;
3464 t->level2_alloc = t->level2_size = 0;
3465 t->level3_alloc = t->level3_size = 0;
3466 }
3467
3468 /* Add one entry. */
3469 static void
3470 wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width)
3471 {
3472 uint32_t index1 = wc >> (t->q + t->p);
3473 uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3474 uint32_t index3 = wc & ((1 << t->p) - 1);
3475 size_t i, i1, i2;
3476
3477 if (width == 0xff)
3478 return;
3479
3480 if (index1 >= t->level1_size)
3481 {
3482 if (index1 >= t->level1_alloc)
3483 {
3484 size_t alloc = 2 * t->level1_alloc;
3485 if (alloc <= index1)
3486 alloc = index1 + 1;
3487 t->level1 = (t->level1_alloc > 0
3488 ? (uint32_t *) xrealloc ((char *) t->level1,
3489 alloc * sizeof (uint32_t))
3490 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3491 t->level1_alloc = alloc;
3492 }
3493 while (index1 >= t->level1_size)
3494 t->level1[t->level1_size++] = ~((uint32_t) 0);
3495 }
3496
3497 if (t->level1[index1] == ~((uint32_t) 0))
3498 {
3499 if (t->level2_size == t->level2_alloc)
3500 {
3501 size_t alloc = 2 * t->level2_alloc + 1;
3502 t->level2 = (t->level2_alloc > 0
3503 ? (uint32_t *) xrealloc ((char *) t->level2,
3504 (alloc << t->q) * sizeof (uint32_t))
3505 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3506 t->level2_alloc = alloc;
3507 }
3508 i1 = t->level2_size << t->q;
3509 i2 = (t->level2_size + 1) << t->q;
3510 for (i = i1; i < i2; i++)
3511 t->level2[i] = ~((uint32_t) 0);
3512 t->level1[index1] = t->level2_size++;
3513 }
3514
3515 index2 += t->level1[index1] << t->q;
3516
3517 if (t->level2[index2] == ~((uint32_t) 0))
3518 {
3519 if (t->level3_size == t->level3_alloc)
3520 {
3521 size_t alloc = 2 * t->level3_alloc + 1;
3522 t->level3 = (t->level3_alloc > 0
3523 ? (uint8_t *) xrealloc ((char *) t->level3,
3524 (alloc << t->p) * sizeof (uint8_t))
3525 : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t)));
3526 t->level3_alloc = alloc;
3527 }
3528 i1 = t->level3_size << t->p;
3529 i2 = (t->level3_size + 1) << t->p;
3530 for (i = i1; i < i2; i++)
3531 t->level3[i] = 0xff;
3532 t->level2[index2] = t->level3_size++;
3533 }
3534
3535 index3 += t->level2[index2] << t->p;
3536
3537 t->level3[index3] = width;
3538 }
3539
3540 /* Finalize and shrink. */
3541 static void
3542 wcwidth_table_finalize (struct wcwidth_table *t)
3543 {
3544 size_t i, j, k;
3545 uint32_t reorder3[t->level3_size];
3546 uint32_t reorder2[t->level2_size];
3547 uint32_t level1_offset, level2_offset, level3_offset, last_offset;
3548
3549 /* Uniquify level3 blocks. */
3550 k = 0;
3551 for (j = 0; j < t->level3_size; j++)
3552 {
3553 for (i = 0; i < k; i++)
3554 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3555 (1 << t->p) * sizeof (uint8_t)) == 0)
3556 break;
3557 /* Relocate block j to block i. */
3558 reorder3[j] = i;
3559 if (i == k)
3560 {
3561 if (i != j)
3562 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3563 (1 << t->p) * sizeof (uint8_t));
3564 k++;
3565 }
3566 }
3567 t->level3_size = k;
3568
3569 for (i = 0; i < (t->level2_size << t->q); i++)
3570 if (t->level2[i] != ~((uint32_t) 0))
3571 t->level2[i] = reorder3[t->level2[i]];
3572
3573 /* Uniquify level2 blocks. */
3574 k = 0;
3575 for (j = 0; j < t->level2_size; j++)
3576 {
3577 for (i = 0; i < k; i++)
3578 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3579 (1 << t->q) * sizeof (uint32_t)) == 0)
3580 break;
3581 /* Relocate block j to block i. */
3582 reorder2[j] = i;
3583 if (i == k)
3584 {
3585 if (i != j)
3586 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3587 (1 << t->q) * sizeof (uint32_t));
3588 k++;
3589 }
3590 }
3591 t->level2_size = k;
3592
3593 for (i = 0; i < t->level1_size; i++)
3594 if (t->level1[i] != ~((uint32_t) 0))
3595 t->level1[i] = reorder2[t->level1[i]];
3596
3597 /* Create and fill the resulting compressed representation. */
3598 last_offset =
3599 5 * sizeof (uint32_t)
3600 + t->level1_size * sizeof (uint32_t)
3601 + (t->level2_size << t->q) * sizeof (uint32_t)
3602 + (t->level3_size << t->p) * sizeof (uint8_t);
3603 t->result_size = (last_offset + 3) & ~3ul;
3604 t->result = (char *) xmalloc (t->result_size);
3605
3606 level1_offset =
3607 5 * sizeof (uint32_t);
3608 level2_offset =
3609 5 * sizeof (uint32_t)
3610 + t->level1_size * sizeof (uint32_t);
3611 level3_offset =
3612 5 * sizeof (uint32_t)
3613 + t->level1_size * sizeof (uint32_t)
3614 + (t->level2_size << t->q) * sizeof (uint32_t);
3615
3616 ((uint32_t *) t->result)[0] = t->q + t->p;
3617 ((uint32_t *) t->result)[1] = t->level1_size;
3618 ((uint32_t *) t->result)[2] = t->p;
3619 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3620 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3621
3622 for (i = 0; i < t->level1_size; i++)
3623 ((uint32_t *) (t->result + level1_offset))[i] =
3624 (t->level1[i] == ~((uint32_t) 0)
3625 ? 0
3626 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3627
3628 for (i = 0; i < (t->level2_size << t->q); i++)
3629 ((uint32_t *) (t->result + level2_offset))[i] =
3630 (t->level2[i] == ~((uint32_t) 0)
3631 ? 0
3632 : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset);
3633
3634 for (i = 0; i < (t->level3_size << t->p); i++)
3635 ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i];
3636
3637 if (last_offset < t->result_size)
3638 memset (t->result + last_offset, 0, t->result_size - last_offset);
3639
3640 if (t->level1_alloc > 0)
3641 free (t->level1);
3642 if (t->level2_alloc > 0)
3643 free (t->level2);
3644 if (t->level3_alloc > 0)
3645 free (t->level3);
3646 }
3647
3648 struct wctrans_table
3649 {
3650 /* Parameters. */
3651 unsigned int p;
3652 unsigned int q;
3653 /* Working representation. */
3654 size_t level1_alloc;
3655 size_t level1_size;
3656 uint32_t *level1;
3657 size_t level2_alloc;
3658 size_t level2_size;
3659 uint32_t *level2;
3660 size_t level3_alloc;
3661 size_t level3_size;
3662 int32_t *level3;
3663 /* Compressed representation. */
3664 size_t result_size;
3665 char *result;
3666 };
3667
3668 /* Initialize. Assumes t->p and t->q have already been set. */
3669 static inline void
3670 wctrans_table_init (struct wctrans_table *t)
3671 {
3672 t->level1_alloc = t->level1_size = 0;
3673 t->level2_alloc = t->level2_size = 0;
3674 t->level3_alloc = t->level3_size = 0;
3675 }
3676
3677 /* Add one entry. */
3678 static void
3679 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3680 {
3681 uint32_t index1 = wc >> (t->q + t->p);
3682 uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3683 uint32_t index3 = wc & ((1 << t->p) - 1);
3684 int32_t value = (int32_t) mapped_wc - (int32_t) wc;
3685 size_t i, i1, i2;
3686
3687 if (value == 0)
3688 return;
3689
3690 if (index1 >= t->level1_size)
3691 {
3692 if (index1 >= t->level1_alloc)
3693 {
3694 size_t alloc = 2 * t->level1_alloc;
3695 if (alloc <= index1)
3696 alloc = index1 + 1;
3697 t->level1 = (t->level1_alloc > 0
3698 ? (uint32_t *) xrealloc ((char *) t->level1,
3699 alloc * sizeof (uint32_t))
3700 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3701 t->level1_alloc = alloc;
3702 }
3703 while (index1 >= t->level1_size)
3704 t->level1[t->level1_size++] = ~((uint32_t) 0);
3705 }
3706
3707 if (t->level1[index1] == ~((uint32_t) 0))
3708 {
3709 if (t->level2_size == t->level2_alloc)
3710 {
3711 size_t alloc = 2 * t->level2_alloc + 1;
3712 t->level2 = (t->level2_alloc > 0
3713 ? (uint32_t *) xrealloc ((char *) t->level2,
3714 (alloc << t->q) * sizeof (uint32_t))
3715 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3716 t->level2_alloc = alloc;
3717 }
3718 i1 = t->level2_size << t->q;
3719 i2 = (t->level2_size + 1) << t->q;
3720 for (i = i1; i < i2; i++)
3721 t->level2[i] = ~((uint32_t) 0);
3722 t->level1[index1] = t->level2_size++;
3723 }
3724
3725 index2 += t->level1[index1] << t->q;
3726
3727 if (t->level2[index2] == ~((uint32_t) 0))
3728 {
3729 if (t->level3_size == t->level3_alloc)
3730 {
3731 size_t alloc = 2 * t->level3_alloc + 1;
3732 t->level3 = (t->level3_alloc > 0
3733 ? (int32_t *) xrealloc ((char *) t->level3,
3734 (alloc << t->p) * sizeof (int32_t))
3735 : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t)));
3736 t->level3_alloc = alloc;
3737 }
3738 i1 = t->level3_size << t->p;
3739 i2 = (t->level3_size + 1) << t->p;
3740 for (i = i1; i < i2; i++)
3741 t->level3[i] = 0;
3742 t->level2[index2] = t->level3_size++;
3743 }
3744
3745 index3 += t->level2[index2] << t->p;
3746
3747 t->level3[index3] = value;
3748 }
3749
3750 /* Finalize and shrink. */
3751 static void
3752 wctrans_table_finalize (struct wctrans_table *t)
3753 {
3754 size_t i, j, k;
3755 uint32_t reorder3[t->level3_size];
3756 uint32_t reorder2[t->level2_size];
3757 uint32_t level1_offset, level2_offset, level3_offset;
3758
3759 /* Uniquify level3 blocks. */
3760 k = 0;
3761 for (j = 0; j < t->level3_size; j++)
3762 {
3763 for (i = 0; i < k; i++)
3764 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3765 (1 << t->p) * sizeof (int32_t)) == 0)
3766 break;
3767 /* Relocate block j to block i. */
3768 reorder3[j] = i;
3769 if (i == k)
3770 {
3771 if (i != j)
3772 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3773 (1 << t->p) * sizeof (int32_t));
3774 k++;
3775 }
3776 }
3777 t->level3_size = k;
3778
3779 for (i = 0; i < (t->level2_size << t->q); i++)
3780 if (t->level2[i] != ~((uint32_t) 0))
3781 t->level2[i] = reorder3[t->level2[i]];
3782
3783 /* Uniquify level2 blocks. */
3784 k = 0;
3785 for (j = 0; j < t->level2_size; j++)
3786 {
3787 for (i = 0; i < k; i++)
3788 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3789 (1 << t->q) * sizeof (uint32_t)) == 0)
3790 break;
3791 /* Relocate block j to block i. */
3792 reorder2[j] = i;
3793 if (i == k)
3794 {
3795 if (i != j)
3796 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3797 (1 << t->q) * sizeof (uint32_t));
3798 k++;
3799 }
3800 }
3801 t->level2_size = k;
3802
3803 for (i = 0; i < t->level1_size; i++)
3804 if (t->level1[i] != ~((uint32_t) 0))
3805 t->level1[i] = reorder2[t->level1[i]];
3806
3807 /* Create and fill the resulting compressed representation. */
3808 t->result_size =
3809 5 * sizeof (uint32_t)
3810 + t->level1_size * sizeof (uint32_t)
3811 + (t->level2_size << t->q) * sizeof (uint32_t)
3812 + (t->level3_size << t->p) * sizeof (int32_t);
3813 t->result = (char *) xmalloc (t->result_size);
3814
3815 level1_offset =
3816 5 * sizeof (uint32_t);
3817 level2_offset =
3818 5 * sizeof (uint32_t)
3819 + t->level1_size * sizeof (uint32_t);
3820 level3_offset =
3821 5 * sizeof (uint32_t)
3822 + t->level1_size * sizeof (uint32_t)
3823 + (t->level2_size << t->q) * sizeof (uint32_t);
3824
3825 ((uint32_t *) t->result)[0] = t->q + t->p;
3826 ((uint32_t *) t->result)[1] = t->level1_size;
3827 ((uint32_t *) t->result)[2] = t->p;
3828 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3829 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3830
3831 for (i = 0; i < t->level1_size; i++)
3832 ((uint32_t *) (t->result + level1_offset))[i] =
3833 (t->level1[i] == ~((uint32_t) 0)
3834 ? 0
3835 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3836
3837 for (i = 0; i < (t->level2_size << t->q); i++)
3838 ((uint32_t *) (t->result + level2_offset))[i] =
3839 (t->level2[i] == ~((uint32_t) 0)
3840 ? 0
3841 : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset);
3842
3843 for (i = 0; i < (t->level3_size << t->p); i++)
3844 ((int32_t *) (t->result + level3_offset))[i] = t->level3[i];
3845
3846 if (t->level1_alloc > 0)
3847 free (t->level1);
3848 if (t->level2_alloc > 0)
3849 free (t->level2);
3850 if (t->level3_alloc > 0)
3851 free (t->level3);
3852 }
3853
3854
3855 static void
3856 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3857 struct repertoire_t *repertoire)
3858 {
3859 size_t idx;
3860 size_t width_table_size;
3861 const void *key;
3862 size_t len;
3863 void *vdata;
3864 void *curs;
3865
3866 /* First we have to decide how we organize the arrays. It is easy
3867 for a one-byte character set. But multi-byte character set
3868 cannot be stored flat because the chars might be sparsely used.
3869 So we determine an optimal hashing function for the used
3870 characters.
3871
3872 We use a very trivial hashing function to store the sparse
3873 table. CH % TABSIZE is used as an index. To solve multiple hits
3874 we have N planes. This guarantees a fixed search time for a
3875 character [N / 2]. In the following code we determine the minimum
3876 value for TABSIZE * N, where TABSIZE >= 256.
3877
3878 Some people complained that this algorithm takes too long. Well,
3879 go on, improve it. But changing the step size is *not* an
3880 option. Some people changed this to use only sizes of prime
3881 numbers. Think again, do some math. We are looking for the
3882 optimal solution, not something which works in general. Unless
3883 somebody can provide a dynamic programming solution I think this
3884 implementation is as good as it can get. */
3885 size_t min_total = UINT_MAX;
3886 size_t act_size = 256;
3887
3888 if (oldstyle_tables)
3889 {
3890 if (!be_quiet && ctype->charnames_act > 512)
3891 fputs (_("\
3892 Computing table size for character classes might take a while..."),
3893 stderr);
3894
3895 /* While we want to have a small total size we are willing to use a
3896 little bit larger table if this reduces the number of layers.
3897 Therefore we add a little penalty to the number of planes.
3898 Maybe this constant has to be adjusted a bit. */
3899 #define PENALTY 128
3900 do
3901 {
3902 size_t cnt[act_size];
3903 size_t act_planes = 1;
3904
3905 memset (cnt, '\0', sizeof cnt);
3906
3907 for (idx = 0; idx < 256; ++idx)
3908 cnt[idx] = 1;
3909
3910 for (idx = 0; idx < ctype->charnames_act; ++idx)
3911 if (ctype->charnames[idx] >= 256)
3912 {
3913 size_t nr = ctype->charnames[idx] % act_size;
3914
3915 if (++cnt[nr] > act_planes)
3916 {
3917 act_planes = cnt[nr];
3918 if ((act_size + PENALTY) * act_planes >= min_total)
3919 break;
3920 }
3921 }
3922
3923 if ((act_size + PENALTY) * act_planes < min_total)
3924 {
3925 min_total = (act_size + PENALTY) * act_planes;
3926 ctype->plane_size = act_size;
3927 ctype->plane_cnt = act_planes;
3928 }
3929
3930 ++act_size;
3931 }
3932 while (act_size < min_total);
3933
3934 if (!be_quiet && ctype->charnames_act > 512)
3935 fputs (_(" done\n"), stderr);
3936
3937
3938 ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3939 * ctype->plane_cnt,
3940 sizeof (uint32_t));
3941
3942 for (idx = 1; idx < 256; ++idx)
3943 ctype->names[idx] = idx;
3944
3945 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
3946 ctype->names[0] = 1;
3947
3948 for (idx = 256; idx < ctype->charnames_act; ++idx)
3949 {
3950 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3951 size_t depth = 0;
3952
3953 while (ctype->names[nr + depth * ctype->plane_size])
3954 ++depth;
3955 assert (depth < ctype->plane_cnt);
3956
3957 ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
3958
3959 /* Now for faster access remember the index in the NAMES_B array. */
3960 ctype->charnames[idx] = nr + depth * ctype->plane_size;
3961 }
3962 ctype->names[0] = 0;
3963 }
3964 else
3965 {
3966 ctype->plane_size = 0;
3967 ctype->plane_cnt = 0;
3968 ctype->names = NULL;
3969 }
3970
3971 /* You wonder about this amount of memory? This is only because some
3972 users do not manage to address the array with unsigned values or
3973 data types with range >= 256. '\200' would result in the array
3974 index -128. To help these poor people we duplicate the entries for
3975 128 up to 255 below the entry for \0. */
3976 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3977 sizeof (char_class_t));
3978 ctype->ctype32_b = (char_class32_t *)
3979 xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
3980 sizeof (char_class32_t));
3981 if (!oldstyle_tables)
3982 ctype->class_3level = (struct iovec *)
3983 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3984
3985 /* This is the array accessed using the multibyte string elements. */
3986 for (idx = 0; idx < 256; ++idx)
3987 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3988
3989 /* Mirror first 127 entries. We must take care that entry -1 is not
3990 mirrored because EOF == -1. */
3991 for (idx = 0; idx < 127; ++idx)
3992 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3993
3994 if (oldstyle_tables)
3995 {
3996 /* The 32 bit array contains all characters. */
3997 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3998 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3999 }
4000 else
4001 {
4002 /* The 32 bit array contains all characters < 0x100. */
4003 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4004 if (ctype->charnames[idx] < 0x100)
4005 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
4006 }
4007
4008 if (!oldstyle_tables)
4009 {
4010 size_t nr;
4011
4012 for (nr = 0; nr < ctype->nr_charclass; nr++)
4013 {
4014 struct wctype_table t;
4015
4016 t.p = 4; /* or: 5 */
4017 t.q = 7; /* or: 6 */
4018 wctype_table_init (&t);
4019
4020 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4021 if (ctype->class_collection[idx] & _ISwbit (nr))
4022 wctype_table_add (&t, ctype->charnames[idx]);
4023
4024 wctype_table_finalize (&t);
4025
4026 if (verbose)
4027 fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
4028 "LC_CTYPE", ctype->classnames[nr],
4029 (unsigned long int) t.result_size);
4030
4031 ctype->class_3level[nr].iov_base = t.result;
4032 ctype->class_3level[nr].iov_len = t.result_size;
4033 }
4034 }
4035
4036 /* Room for table of mappings. */
4037 ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
4038 ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
4039 * sizeof (uint32_t *));
4040 if (!oldstyle_tables)
4041 ctype->map_3level = (struct iovec *)
4042 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
4043
4044 /* Fill in all mappings. */
4045 for (idx = 0; idx < 2; ++idx)
4046 {
4047 unsigned int idx2;
4048
4049 /* Allocate table. */
4050 ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
4051
4052 /* Copy values from collection. */
4053 for (idx2 = 0; idx2 < 256; ++idx2)
4054 ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
4055
4056 /* Mirror first 127 entries. We must take care not to map entry
4057 -1 because EOF == -1. */
4058 for (idx2 = 0; idx2 < 127; ++idx2)
4059 ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
4060
4061 /* EOF must map to EOF. */
4062 ctype->map[idx][127] = EOF;
4063 }
4064
4065 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
4066 {
4067 unsigned int idx2;
4068
4069 /* Allocate table. */
4070 ctype->map32[idx] = (uint32_t *)
4071 xmalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
4072 * sizeof (uint32_t));
4073
4074 /* Copy default value (identity mapping). */
4075 if (oldstyle_tables)
4076 memcpy (ctype->map32[idx], ctype->names,
4077 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
4078 else
4079 for (idx2 = 0; idx2 < 256; ++idx2)
4080 ctype->map32[idx][idx2] = idx2;
4081
4082 /* Copy values from collection. */
4083 for (idx2 = 0; idx2 < 256; ++idx2)
4084 if (ctype->map_collection[idx][idx2] != 0)
4085 ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
4086
4087 if (oldstyle_tables)
4088 while (idx2 < ctype->map_collection_act[idx])
4089 {
4090 if (ctype->map_collection[idx][idx2] != 0)
4091 ctype->map32[idx][ctype->charnames[idx2]] =
4092 ctype->map_collection[idx][idx2];
4093 ++idx2;
4094 }
4095 }
4096
4097 if (!oldstyle_tables)
4098 {
4099 size_t nr;
4100
4101 for (nr = 0; nr < ctype->map_collection_nr; nr++)
4102 {
4103 struct wctrans_table t;
4104
4105 t.p = 7;
4106 t.q = 9;
4107 wctrans_table_init (&t);
4108
4109 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
4110 if (ctype->map_collection[nr][idx] != 0)
4111 wctrans_table_add (&t, ctype->charnames[idx],
4112 ctype->map_collection[nr][idx]);
4113
4114 wctrans_table_finalize (&t);
4115
4116 if (verbose)
4117 fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
4118 "LC_CTYPE", ctype->mapnames[nr],
4119 (unsigned long int) t.result_size);
4120
4121 ctype->map_3level[nr].iov_base = t.result;
4122 ctype->map_3level[nr].iov_len = t.result_size;
4123 }
4124 }
4125
4126 /* Extra array for class and map names. */
4127 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
4128 * sizeof (uint32_t));
4129 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
4130 * sizeof (uint32_t));
4131
4132 if (oldstyle_tables)
4133 {
4134 ctype->class_offset = 0; /* not really used */
4135 ctype->map_offset = 0; /* not really used */
4136 }
4137 else
4138 {
4139 ctype->class_offset = _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
4140 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
4141 }
4142
4143 /* Array for width information. Because the expected width are very
4144 small we use only one single byte. This saves space. */
4145 if (oldstyle_tables)
4146 {
4147 width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
4148 ctype->width = (unsigned char *) xmalloc (width_table_size);
4149
4150 /* Initialize with -1. */
4151 memset (ctype->width, '\xff', width_table_size);
4152 if (charmap->width_rules != NULL)
4153 {
4154 size_t cnt;
4155
4156 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4157 {
4158 unsigned char bytes[charmap->mb_cur_max];
4159 int nbytes = charmap->width_rules[cnt].from->nbytes;
4160
4161 /* We have the range of character for which the width is
4162 specified described using byte sequences of the multibyte
4163 charset. We have to convert this to UCS4 now. And we
4164 cannot simply convert the beginning and the end of the
4165 sequence, we have to iterate over the byte sequence and
4166 convert it for every single character. */
4167 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4168
4169 while (nbytes < charmap->width_rules[cnt].to->nbytes
4170 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4171 nbytes) <= 0)
4172 {
4173 /* Find the UCS value for `bytes'. */
4174 int inner;
4175 uint32_t wch;
4176 struct charseq *seq =
4177 charmap_find_symbol (charmap, bytes, nbytes);
4178
4179 if (seq == NULL)
4180 wch = ILLEGAL_CHAR_VALUE;
4181 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4182 wch = seq->ucs4;
4183 else
4184 wch = repertoire_find_value (ctype->repertoire, seq->name,
4185 strlen (seq->name));
4186
4187 if (wch != ILLEGAL_CHAR_VALUE)
4188 {
4189 /* Store the value. */
4190 size_t nr = wch % ctype->plane_size;
4191 size_t depth = 0;
4192
4193 while (ctype->names[nr + depth * ctype->plane_size] != wch)
4194 {
4195 ++depth;
4196 assert (depth < ctype->plane_cnt);
4197 }
4198
4199 ctype->width[nr + depth * ctype->plane_size]
4200 = charmap->width_rules[cnt].width;
4201 }
4202
4203 /* "Increment" the bytes sequence. */
4204 inner = nbytes - 1;
4205 while (inner >= 0 && bytes[inner] == 0xff)
4206 --inner;
4207
4208 if (inner < 0)
4209 {
4210 /* We have to extend the byte sequence. */
4211 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4212 break;
4213
4214 bytes[0] = 1;
4215 memset (&bytes[1], 0, nbytes);
4216 ++nbytes;
4217 }
4218 else
4219 {
4220 ++bytes[inner];
4221 while (++inner < nbytes)
4222 bytes[inner] = 0;
4223 }
4224 }
4225 }
4226 }
4227
4228 /* Now set all the other characters of the character set to the
4229 default width. */
4230 curs = NULL;
4231 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4232 {
4233 struct charseq *data = (struct charseq *) vdata;
4234 size_t nr;
4235 size_t depth;
4236
4237 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4238 data->ucs4 = repertoire_find_value (ctype->repertoire,
4239 data->name, len);
4240
4241 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4242 {
4243 nr = data->ucs4 % ctype->plane_size;
4244 depth = 0;
4245
4246 while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
4247 {
4248 ++depth;
4249 assert (depth < ctype->plane_cnt);
4250 }
4251
4252 if (ctype->width[nr + depth * ctype->plane_size]
4253 == (unsigned char) '\xff')
4254 ctype->width[nr + depth * ctype->plane_size] =
4255 charmap->width_default;
4256 }
4257 }
4258 }
4259 else
4260 {
4261 struct wcwidth_table t;
4262
4263 t.p = 7;
4264 t.q = 9;
4265 wcwidth_table_init (&t);
4266
4267 /* First set all the characters of the character set to the default width. */
4268 curs = NULL;
4269 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4270 {
4271 struct charseq *data = (struct charseq *) vdata;
4272
4273 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4274 data->ucs4 = repertoire_find_value (ctype->repertoire,
4275 data->name, len);
4276
4277 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4278 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4279 }
4280
4281 /* Now add the explicitly specified widths. */
4282 if (charmap->width_rules != NULL)
4283 {
4284 size_t cnt;
4285
4286 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4287 {
4288 unsigned char bytes[charmap->mb_cur_max];
4289 int nbytes = charmap->width_rules[cnt].from->nbytes;
4290
4291 /* We have the range of character for which the width is
4292 specified described using byte sequences of the multibyte
4293 charset. We have to convert this to UCS4 now. And we
4294 cannot simply convert the beginning and the end of the
4295 sequence, we have to iterate over the byte sequence and
4296 convert it for every single character. */
4297 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4298
4299 while (nbytes < charmap->width_rules[cnt].to->nbytes
4300 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4301 nbytes) <= 0)
4302 {
4303 /* Find the UCS value for `bytes'. */
4304 int inner;
4305 uint32_t wch;
4306 struct charseq *seq =
4307 charmap_find_symbol (charmap, bytes, nbytes);
4308
4309 if (seq == NULL)
4310 wch = ILLEGAL_CHAR_VALUE;
4311 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4312 wch = seq->ucs4;
4313 else
4314 wch = repertoire_find_value (ctype->repertoire, seq->name,
4315 strlen (seq->name));
4316
4317 if (wch != ILLEGAL_CHAR_VALUE)
4318 /* Store the value. */
4319 wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
4320
4321 /* "Increment" the bytes sequence. */
4322 inner = nbytes - 1;
4323 while (inner >= 0 && bytes[inner] == 0xff)
4324 --inner;
4325
4326 if (inner < 0)
4327 {
4328 /* We have to extend the byte sequence. */
4329 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4330 break;
4331
4332 bytes[0] = 1;
4333 memset (&bytes[1], 0, nbytes);
4334 ++nbytes;
4335 }
4336 else
4337 {
4338 ++bytes[inner];
4339 while (++inner < nbytes)
4340 bytes[inner] = 0;
4341 }
4342 }
4343 }
4344 }
4345
4346 wcwidth_table_finalize (&t);
4347
4348 if (verbose)
4349 fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4350 "LC_CTYPE", (unsigned long int) t.result_size);
4351
4352 ctype->width_3level.iov_base = t.result;
4353 ctype->width_3level.iov_len = t.result_size;
4354 }
4355
4356 /* Set MB_CUR_MAX. */
4357 ctype->mb_cur_max = charmap->mb_cur_max;
4358
4359 /* Now determine the table for the transliteration information.
4360
4361 XXX It is not yet clear to me whether it is worth implementing a
4362 complicated algorithm which uses a hash table to locate the entries.
4363 For now I'll use a simple array which can be searching using binary
4364 search. */
4365 if (ctype->translit_copy_locale != NULL)
4366 {
4367 /* Fold in the transliteration information from the locale mentioned
4368 in the `include' statement. */
4369 struct locale_ctype_t *here = ctype;
4370
4371 do
4372 {
4373 struct localedef_t *other = find_locale (LC_CTYPE,
4374 here->translit_copy_locale,
4375 repertoire->name, charmap);
4376
4377 if (other == NULL)
4378 {
4379 error (0, 0, _("\
4380 %s: transliteration data from locale `%s' not available"),
4381 "LC_CTYPE", here->translit_copy_locale);
4382 break;
4383 }
4384
4385 here = other->categories[LC_CTYPE].ctype;
4386
4387 /* Enqueue the information if necessary. */
4388 if (here->translit != NULL)
4389 {
4390 struct translit_t *endp = here->translit;
4391 while (endp->next != NULL)
4392 endp = endp->next;
4393
4394 endp->next = ctype->translit;
4395 ctype->translit = here->translit;
4396 }
4397 }
4398 while (here->translit_copy_locale != NULL);
4399 }
4400
4401 if (ctype->translit != NULL)
4402 {
4403 /* First count how many entries we have. This is the upper limit
4404 since some entries from the included files might be overwritten. */
4405 size_t number = 0;
4406 size_t cnt;
4407 struct translit_t *runp = ctype->translit;
4408 struct translit_t **sorted;
4409 size_t from_len, to_len;
4410
4411 while (runp != NULL)
4412 {
4413 ++number;
4414 runp = runp->next;
4415 }
4416
4417 /* Next we allocate an array large enough and fill in the values. */
4418 sorted = (struct translit_t **) alloca (number
4419 * sizeof (struct translit_t **));
4420 runp = ctype->translit;
4421 number = 0;
4422 do
4423 {
4424 /* Search for the place where to insert this string.
4425 XXX Better use a real sorting algorithm later. */
4426 size_t idx = 0;
4427 int replace = 0;
4428
4429 while (idx < number)
4430 {
4431 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4432 (const wchar_t *) runp->from);
4433 if (res == 0)
4434 {
4435 replace = 1;
4436 break;
4437 }
4438 if (res > 0)
4439 break;
4440 ++idx;
4441 }
4442
4443 if (replace)
4444 sorted[idx] = runp;
4445 else
4446 {
4447 memmove (&sorted[idx + 1], &sorted[idx],
4448 (number - idx) * sizeof (struct translit_t *));
4449 sorted[idx] = runp;
4450 ++number;
4451 }
4452
4453 runp = runp->next;
4454 }
4455 while (runp != NULL);
4456
4457 /* The next step is putting all the possible transliteration
4458 strings in one memory block so that we can write it out.
4459 We need several different blocks:
4460 - index to the from-string array
4461 - from-string array
4462 - index to the to-string array
4463 - to-string array.
4464 */
4465 from_len = to_len = 0;
4466 for (cnt = 0; cnt < number; ++cnt)
4467 {
4468 struct translit_to_t *srunp;
4469 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4470 srunp = sorted[cnt]->to;
4471 while (srunp != NULL)
4472 {
4473 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4474 srunp = srunp->next;
4475 }
4476 /* Plus one for the extra NUL character marking the end of
4477 the list for the current entry. */
4478 ++to_len;
4479 }
4480
4481 /* We can allocate the arrays for the results. */
4482 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4483 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4484 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4485 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4486
4487 from_len = 0;
4488 to_len = 0;
4489 for (cnt = 0; cnt < number; ++cnt)
4490 {
4491 size_t len;
4492 struct translit_to_t *srunp;
4493
4494 ctype->translit_from_idx[cnt] = from_len;
4495 ctype->translit_to_idx[cnt] = to_len;
4496
4497 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4498 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4499 (const wchar_t *) sorted[cnt]->from, len);
4500 from_len += len;
4501
4502 ctype->translit_to_idx[cnt] = to_len;
4503 srunp = sorted[cnt]->to;
4504 while (srunp != NULL)
4505 {
4506 len = wcslen ((const wchar_t *) srunp->str) + 1;
4507 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4508 (const wchar_t *) srunp->str, len);
4509 to_len += len;
4510 srunp = srunp->next;
4511 }
4512 ctype->translit_to_tbl[to_len++] = L'\0';
4513 }
4514
4515 /* Store the information about the length. */
4516 ctype->translit_idx_size = number;
4517 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4518 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4519 }
4520 else
4521 {
4522 /* Provide some dummy pointers since we have nothing to write out. */
4523 static uint32_t no_str = { 0 };
4524
4525 ctype->translit_from_idx = &no_str;
4526 ctype->translit_from_tbl = &no_str;
4527 ctype->translit_to_tbl = &no_str;
4528 ctype->translit_idx_size = 0;
4529 ctype->translit_from_tbl_size = 0;
4530 ctype->translit_to_tbl_size = 0;
4531 }
4532 }