]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <byteswap.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <obstack.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <sys/uio.h>
35
36 #include "charmap.h"
37 #include "localeinfo.h"
38 #include "langinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
41 #include "locfile.h"
42 #include "localedef.h"
43
44 #include <assert.h>
45
46
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
53 #endif
54
55
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
60
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
65
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
71
72
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77 struct translit_to_t
78 {
79 uint32_t *str;
80
81 struct translit_to_t *next;
82 };
83
84 struct translit_t
85 {
86 uint32_t *from;
87
88 const char *fname;
89 size_t lineno;
90
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94 };
95
96 struct translit_ignore_t
97 {
98 uint32_t from;
99 uint32_t to;
100 uint32_t step;
101
102 const char *fname;
103 size_t lineno;
104
105 struct translit_ignore_t *next;
106 };
107
108
109 /* The real definition of the struct for the LC_CTYPE locale. */
110 struct locale_ctype_t
111 {
112 uint32_t *charnames;
113 size_t charnames_max;
114 size_t charnames_act;
115 /* An index lookup table, to speedup find_idx. */
116 #define MAX_CHARNAMES_IDX 0x10000
117 uint32_t *charnames_idx;
118
119 struct repertoire_t *repertoire;
120
121 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
122 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
123 size_t nr_charclass;
124 const char *classnames[MAX_NR_CHARCLASS];
125 uint32_t last_class_char;
126 uint32_t class256_collection[256];
127 uint32_t *class_collection;
128 size_t class_collection_max;
129 size_t class_collection_act;
130 uint32_t class_done;
131 uint32_t class_offset;
132
133 struct charseq **mbdigits;
134 size_t mbdigits_act;
135 size_t mbdigits_max;
136 uint32_t *wcdigits;
137 size_t wcdigits_act;
138 size_t wcdigits_max;
139
140 struct charseq *mboutdigits[10];
141 uint32_t wcoutdigits[10];
142 size_t outdigits_act;
143
144 /* If the following number ever turns out to be too small simply
145 increase it. But I doubt it will. --drepper@gnu */
146 #define MAX_NR_CHARMAP 16
147 const char *mapnames[MAX_NR_CHARMAP];
148 uint32_t *map_collection[MAX_NR_CHARMAP];
149 uint32_t map256_collection[2][256];
150 size_t map_collection_max[MAX_NR_CHARMAP];
151 size_t map_collection_act[MAX_NR_CHARMAP];
152 size_t map_collection_nr;
153 size_t last_map_idx;
154 int tomap_done[MAX_NR_CHARMAP];
155 uint32_t map_offset;
156
157 /* Transliteration information. */
158 const char *translit_copy_locale;
159 const char *translit_copy_repertoire;
160 struct translit_t *translit;
161 struct translit_ignore_t *translit_ignore;
162 uint32_t ntranslit_ignore;
163
164 uint32_t *default_missing;
165 const char *default_missing_file;
166 size_t default_missing_lineno;
167
168 /* The arrays for the binary representation. */
169 char_class_t *ctype_b;
170 char_class32_t *ctype32_b;
171 uint32_t **map_b;
172 uint32_t **map32_b;
173 uint32_t **class_b;
174 struct iovec *class_3level;
175 struct iovec *map_3level;
176 uint32_t *class_name_ptr;
177 uint32_t *map_name_ptr;
178 struct iovec width;
179 uint32_t mb_cur_max;
180 const char *codeset_name;
181 uint32_t *translit_from_idx;
182 uint32_t *translit_from_tbl;
183 uint32_t *translit_to_idx;
184 uint32_t *translit_to_tbl;
185 uint32_t translit_idx_size;
186 size_t translit_from_tbl_size;
187 size_t translit_to_tbl_size;
188
189 struct obstack mempool;
190 };
191
192
193 #define obstack_chunk_alloc xmalloc
194 #define obstack_chunk_free free
195
196
197 /* Prototypes for local functions. */
198 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
199 struct charmap_t *charmap,
200 struct localedef_t *copy_locale,
201 int ignore_content);
202 static void ctype_class_new (struct linereader *lr,
203 struct locale_ctype_t *ctype, const char *name);
204 static void ctype_map_new (struct linereader *lr,
205 struct locale_ctype_t *ctype,
206 const char *name, struct charmap_t *charmap);
207 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
208 size_t *max, size_t *act, unsigned int idx);
209 static void set_class_defaults (struct locale_ctype_t *ctype,
210 struct charmap_t *charmap,
211 struct repertoire_t *repertoire);
212 static void allocate_arrays (struct locale_ctype_t *ctype,
213 struct charmap_t *charmap,
214 struct repertoire_t *repertoire);
215
216
217 static const char *longnames[] =
218 {
219 "zero", "one", "two", "three", "four",
220 "five", "six", "seven", "eight", "nine"
221 };
222 static const char *uninames[] =
223 {
224 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
225 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
226 };
227 static const unsigned char digits[] = "0123456789";
228
229
230 static void
231 ctype_startup (struct linereader *lr, struct localedef_t *locale,
232 struct charmap_t *charmap, struct localedef_t *copy_locale,
233 int ignore_content)
234 {
235 unsigned int cnt;
236 struct locale_ctype_t *ctype;
237
238 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
239 {
240 if (copy_locale == NULL)
241 {
242 /* Allocate the needed room. */
243 locale->categories[LC_CTYPE].ctype = ctype =
244 (struct locale_ctype_t *) xcalloc (1,
245 sizeof (struct locale_ctype_t));
246
247 /* We have seen no names yet. */
248 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
249 ctype->charnames =
250 (unsigned int *) xmalloc (ctype->charnames_max
251 * sizeof (unsigned int));
252 for (cnt = 0; cnt < 256; ++cnt)
253 ctype->charnames[cnt] = cnt;
254 ctype->charnames_act = 256;
255 ctype->charnames_idx =
256 (uint32_t *) xmalloc (MAX_CHARNAMES_IDX * sizeof (uint32_t));
257 for (cnt = 0; cnt < MAX_CHARNAMES_IDX; ++cnt)
258 ctype->charnames_idx[cnt] = ~((uint32_t) 0);
259
260 /* Fill character class information. */
261 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
262 /* The order of the following instructions determines the bit
263 positions! */
264 ctype_class_new (lr, ctype, "upper");
265 ctype_class_new (lr, ctype, "lower");
266 ctype_class_new (lr, ctype, "alpha");
267 ctype_class_new (lr, ctype, "digit");
268 ctype_class_new (lr, ctype, "xdigit");
269 ctype_class_new (lr, ctype, "space");
270 ctype_class_new (lr, ctype, "print");
271 ctype_class_new (lr, ctype, "graph");
272 ctype_class_new (lr, ctype, "blank");
273 ctype_class_new (lr, ctype, "cntrl");
274 ctype_class_new (lr, ctype, "punct");
275 ctype_class_new (lr, ctype, "alnum");
276 #ifdef PREDEFINED_CLASSES
277 /* The following are extensions from ISO 14652. */
278 ctype_class_new (lr, ctype, "left_to_right");
279 ctype_class_new (lr, ctype, "right_to_left");
280 ctype_class_new (lr, ctype, "num_terminator");
281 ctype_class_new (lr, ctype, "num_separator");
282 ctype_class_new (lr, ctype, "segment_separator");
283 ctype_class_new (lr, ctype, "block_separator");
284 ctype_class_new (lr, ctype, "direction_control");
285 ctype_class_new (lr, ctype, "sym_swap_layout");
286 ctype_class_new (lr, ctype, "char_shape_selector");
287 ctype_class_new (lr, ctype, "num_shape_selector");
288 ctype_class_new (lr, ctype, "non_spacing");
289 ctype_class_new (lr, ctype, "non_spacing_level3");
290 ctype_class_new (lr, ctype, "normal_connect");
291 ctype_class_new (lr, ctype, "r_connect");
292 ctype_class_new (lr, ctype, "no_connect");
293 ctype_class_new (lr, ctype, "no_connect-space");
294 ctype_class_new (lr, ctype, "vowel_connect");
295 #endif
296
297 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
298 ctype->class_collection
299 = (uint32_t *) xcalloc (sizeof (unsigned long int),
300 ctype->class_collection_max);
301 ctype->class_collection_act = 256;
302
303 /* Fill character map information. */
304 ctype->last_map_idx = MAX_NR_CHARMAP;
305 ctype_map_new (lr, ctype, "toupper", charmap);
306 ctype_map_new (lr, ctype, "tolower", charmap);
307 #ifdef PREDEFINED_CLASSES
308 ctype_map_new (lr, ctype, "tosymmetric", charmap);
309 #endif
310
311 /* Fill first 256 entries in `toXXX' arrays. */
312 for (cnt = 0; cnt < 256; ++cnt)
313 {
314 ctype->map_collection[0][cnt] = cnt;
315 ctype->map_collection[1][cnt] = cnt;
316 #ifdef PREDEFINED_CLASSES
317 ctype->map_collection[2][cnt] = cnt;
318 #endif
319 ctype->map256_collection[0][cnt] = cnt;
320 ctype->map256_collection[1][cnt] = cnt;
321 }
322
323 obstack_init (&ctype->mempool);
324 }
325 else
326 ctype = locale->categories[LC_CTYPE].ctype =
327 copy_locale->categories[LC_CTYPE].ctype;
328 }
329 }
330
331
332 void
333 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
334 {
335 /* See POSIX.2, table 2-6 for the meaning of the following table. */
336 #define NCLASS 12
337 static const struct
338 {
339 const char *name;
340 const char allow[NCLASS];
341 }
342 valid_table[NCLASS] =
343 {
344 /* The order is important. See token.h for more information.
345 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
346 { "upper", "--MX-XDDXXX-" },
347 { "lower", "--MX-XDDXXX-" },
348 { "alpha", "---X-XDDXXX-" },
349 { "digit", "XXX--XDDXXX-" },
350 { "xdigit", "-----XDDXXX-" },
351 { "space", "XXXXX------X" },
352 { "print", "---------X--" },
353 { "graph", "---------X--" },
354 { "blank", "XXXXXM-----X" },
355 { "cntrl", "XXXXX-XX--XX" },
356 { "punct", "XXXXX-DD-X-X" },
357 { "alnum", "-----XDDXXX-" }
358 };
359 size_t cnt;
360 int cls1, cls2;
361 uint32_t space_value;
362 struct charseq *space_seq;
363 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
364 int warned;
365 const void *key;
366 size_t len;
367 void *vdata;
368 void *curs;
369
370 /* Now resolve copying and also handle completely missing definitions. */
371 if (ctype == NULL)
372 {
373 const char *repertoire_name;
374
375 /* First see whether we were supposed to copy. If yes, find the
376 actual definition. */
377 if (locale->copy_name[LC_CTYPE] != NULL)
378 {
379 /* Find the copying locale. This has to happen transitively since
380 the locale we are copying from might also copying another one. */
381 struct localedef_t *from = locale;
382
383 do
384 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
385 from->repertoire_name, charmap);
386 while (from->categories[LC_CTYPE].ctype == NULL
387 && from->copy_name[LC_CTYPE] != NULL);
388
389 ctype = locale->categories[LC_CTYPE].ctype
390 = from->categories[LC_CTYPE].ctype;
391 }
392
393 /* If there is still no definition issue an warning and create an
394 empty one. */
395 if (ctype == NULL)
396 {
397 if (! be_quiet)
398 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
399 ctype_startup (NULL, locale, charmap, NULL, 0);
400 ctype = locale->categories[LC_CTYPE].ctype;
401 }
402
403 /* Get the repertoire we have to use. */
404 repertoire_name = locale->repertoire_name ?: repertoire_global;
405 if (repertoire_name != NULL)
406 ctype->repertoire = repertoire_read (repertoire_name);
407 }
408
409 /* We need the name of the currently used 8-bit character set to
410 make correct conversion between this 8-bit representation and the
411 ISO 10646 character set used internally for wide characters. */
412 ctype->codeset_name = charmap->code_set_name;
413 if (ctype->codeset_name == NULL)
414 {
415 if (! be_quiet)
416 error (0, 0, _("No character set name specified in charmap"));
417 ctype->codeset_name = "//UNKNOWN//";
418 }
419
420 /* Set default value for classes not specified. */
421 set_class_defaults (ctype, charmap, ctype->repertoire);
422
423 /* Check according to table. */
424 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
425 {
426 uint32_t tmp = ctype->class_collection[cnt];
427
428 if (tmp != 0)
429 {
430 for (cls1 = 0; cls1 < NCLASS; ++cls1)
431 if ((tmp & _ISwbit (cls1)) != 0)
432 for (cls2 = 0; cls2 < NCLASS; ++cls2)
433 if (valid_table[cls1].allow[cls2] != '-')
434 {
435 int eq = (tmp & _ISwbit (cls2)) != 0;
436 switch (valid_table[cls1].allow[cls2])
437 {
438 case 'M':
439 if (!eq)
440 {
441 uint32_t value = ctype->charnames[cnt];
442
443 if (!be_quiet)
444 error (0, 0, _("\
445 character L'\\u%0*x' in class `%s' must be in class `%s'"),
446 value > 0xffff ? 8 : 4, value,
447 valid_table[cls1].name,
448 valid_table[cls2].name);
449 }
450 break;
451
452 case 'X':
453 if (eq)
454 {
455 uint32_t value = ctype->charnames[cnt];
456
457 if (!be_quiet)
458 error (0, 0, _("\
459 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
460 value > 0xffff ? 8 : 4, value,
461 valid_table[cls1].name,
462 valid_table[cls2].name);
463 }
464 break;
465
466 case 'D':
467 ctype->class_collection[cnt] |= _ISwbit (cls2);
468 break;
469
470 default:
471 error (5, 0, _("internal error in %s, line %u"),
472 __FUNCTION__, __LINE__);
473 }
474 }
475 }
476 }
477
478 for (cnt = 0; cnt < 256; ++cnt)
479 {
480 uint32_t tmp = ctype->class256_collection[cnt];
481
482 if (tmp != 0)
483 {
484 for (cls1 = 0; cls1 < NCLASS; ++cls1)
485 if ((tmp & _ISbit (cls1)) != 0)
486 for (cls2 = 0; cls2 < NCLASS; ++cls2)
487 if (valid_table[cls1].allow[cls2] != '-')
488 {
489 int eq = (tmp & _ISbit (cls2)) != 0;
490 switch (valid_table[cls1].allow[cls2])
491 {
492 case 'M':
493 if (!eq)
494 {
495 char buf[17];
496
497 snprintf (buf, sizeof buf, "\\%Zo", cnt);
498
499 if (!be_quiet)
500 error (0, 0, _("\
501 character '%s' in class `%s' must be in class `%s'"),
502 buf, valid_table[cls1].name,
503 valid_table[cls2].name);
504 }
505 break;
506
507 case 'X':
508 if (eq)
509 {
510 char buf[17];
511
512 snprintf (buf, sizeof buf, "\\%Zo", cnt);
513
514 if (!be_quiet)
515 error (0, 0, _("\
516 character '%s' in class `%s' must not be in class `%s'"),
517 buf, valid_table[cls1].name,
518 valid_table[cls2].name);
519 }
520 break;
521
522 case 'D':
523 ctype->class256_collection[cnt] |= _ISbit (cls2);
524 break;
525
526 default:
527 error (5, 0, _("internal error in %s, line %u"),
528 __FUNCTION__, __LINE__);
529 }
530 }
531 }
532 }
533
534 /* ... and now test <SP> as a special case. */
535 space_value = 32;
536 if (((cnt = BITPOS (tok_space),
537 (ELEM (ctype, class_collection, , space_value)
538 & BITw (tok_space)) == 0)
539 || (cnt = BITPOS (tok_blank),
540 (ELEM (ctype, class_collection, , space_value)
541 & BITw (tok_blank)) == 0)))
542 {
543 if (!be_quiet)
544 error (0, 0, _("<SP> character not in class `%s'"),
545 valid_table[cnt].name);
546 }
547 else if (((cnt = BITPOS (tok_punct),
548 (ELEM (ctype, class_collection, , space_value)
549 & BITw (tok_punct)) != 0)
550 || (cnt = BITPOS (tok_graph),
551 (ELEM (ctype, class_collection, , space_value)
552 & BITw (tok_graph))
553 != 0)))
554 {
555 if (!be_quiet)
556 error (0, 0, _("<SP> character must not be in class `%s'"),
557 valid_table[cnt].name);
558 }
559 else
560 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
561
562 space_seq = charmap_find_value (charmap, "SP", 2);
563 if (space_seq == NULL)
564 space_seq = charmap_find_value (charmap, "space", 5);
565 if (space_seq == NULL)
566 space_seq = charmap_find_value (charmap, "U00000020", 9);
567 if (space_seq == NULL || space_seq->nbytes != 1)
568 {
569 if (!be_quiet)
570 error (0, 0, _("character <SP> not defined in character map"));
571 }
572 else if (((cnt = BITPOS (tok_space),
573 (ctype->class256_collection[space_seq->bytes[0]]
574 & BIT (tok_space)) == 0)
575 || (cnt = BITPOS (tok_blank),
576 (ctype->class256_collection[space_seq->bytes[0]]
577 & BIT (tok_blank)) == 0)))
578 {
579 if (!be_quiet)
580 error (0, 0, _("<SP> character not in class `%s'"),
581 valid_table[cnt].name);
582 }
583 else if (((cnt = BITPOS (tok_punct),
584 (ctype->class256_collection[space_seq->bytes[0]]
585 & BIT (tok_punct)) != 0)
586 || (cnt = BITPOS (tok_graph),
587 (ctype->class256_collection[space_seq->bytes[0]]
588 & BIT (tok_graph)) != 0)))
589 {
590 if (!be_quiet)
591 error (0, 0, _("<SP> character must not be in class `%s'"),
592 valid_table[cnt].name);
593 }
594 else
595 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
596
597 /* Now that the tests are done make sure the name array contains all
598 characters which are handled in the WIDTH section of the
599 character set definition file. */
600 if (charmap->width_rules != NULL)
601 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
602 {
603 unsigned char bytes[charmap->mb_cur_max];
604 int nbytes = charmap->width_rules[cnt].from->nbytes;
605
606 /* We have the range of character for which the width is
607 specified described using byte sequences of the multibyte
608 charset. We have to convert this to UCS4 now. And we
609 cannot simply convert the beginning and the end of the
610 sequence, we have to iterate over the byte sequence and
611 convert it for every single character. */
612 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
613
614 while (nbytes < charmap->width_rules[cnt].to->nbytes
615 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
616 nbytes) <= 0)
617 {
618 /* Find the UCS value for `bytes'. */
619 int inner;
620 uint32_t wch;
621 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
622
623 if (seq == NULL)
624 wch = ILLEGAL_CHAR_VALUE;
625 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
626 wch = seq->ucs4;
627 else
628 wch = repertoire_find_value (ctype->repertoire, seq->name,
629 strlen (seq->name));
630
631 if (wch != ILLEGAL_CHAR_VALUE)
632 /* We are only interested in the side-effects of the
633 `find_idx' call. It will add appropriate entries in
634 the name array if this is necessary. */
635 (void) find_idx (ctype, NULL, NULL, NULL, wch);
636
637 /* "Increment" the bytes sequence. */
638 inner = nbytes - 1;
639 while (inner >= 0 && bytes[inner] == 0xff)
640 --inner;
641
642 if (inner < 0)
643 {
644 /* We have to extend the byte sequence. */
645 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
646 break;
647
648 bytes[0] = 1;
649 memset (&bytes[1], 0, nbytes);
650 ++nbytes;
651 }
652 else
653 {
654 ++bytes[inner];
655 while (++inner < nbytes)
656 bytes[inner] = 0;
657 }
658 }
659 }
660
661 /* Now set all the other characters of the character set to the
662 default width. */
663 curs = NULL;
664 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
665 {
666 struct charseq *data = (struct charseq *) vdata;
667
668 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
669 data->ucs4 = repertoire_find_value (ctype->repertoire,
670 data->name, len);
671
672 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
673 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
674 }
675
676 /* There must be a multiple of 10 digits. */
677 if (ctype->mbdigits_act % 10 != 0)
678 {
679 assert (ctype->mbdigits_act == ctype->wcdigits_act);
680 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
681 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
682 error (0, 0, _("`digit' category has not entries in groups of ten"));
683 }
684
685 /* Check the input digits. There must be a multiple of ten available.
686 In each group it could be that one or the other character is missing.
687 In this case the whole group must be removed. */
688 cnt = 0;
689 while (cnt < ctype->mbdigits_act)
690 {
691 size_t inner;
692 for (inner = 0; inner < 10; ++inner)
693 if (ctype->mbdigits[cnt + inner] == NULL)
694 break;
695
696 if (inner == 10)
697 cnt += 10;
698 else
699 {
700 /* Remove the group. */
701 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
702 ((ctype->wcdigits_act - cnt - 10)
703 * sizeof (ctype->mbdigits[0])));
704 ctype->mbdigits_act -= 10;
705 }
706 }
707
708 /* If no input digits are given use the default. */
709 if (ctype->mbdigits_act == 0)
710 {
711 if (ctype->mbdigits_max == 0)
712 {
713 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
714 10 * sizeof (struct charseq *));
715 ctype->mbdigits_max = 10;
716 }
717
718 for (cnt = 0; cnt < 10; ++cnt)
719 {
720 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
721 digits + cnt, 1);
722 if (ctype->mbdigits[cnt] == NULL)
723 {
724 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
725 longnames[cnt],
726 strlen (longnames[cnt]));
727 if (ctype->mbdigits[cnt] == NULL)
728 {
729 /* Hum, this ain't good. */
730 error (0, 0, _("\
731 no input digits defined and none of the standard names in the charmap"));
732
733 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
734 sizeof (struct charseq) + 1);
735
736 /* This is better than nothing. */
737 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
738 ctype->mbdigits[cnt]->nbytes = 1;
739 }
740 }
741 }
742
743 ctype->mbdigits_act = 10;
744 }
745
746 /* Check the wide character input digits. There must be a multiple
747 of ten available. In each group it could be that one or the other
748 character is missing. In this case the whole group must be
749 removed. */
750 cnt = 0;
751 while (cnt < ctype->wcdigits_act)
752 {
753 size_t inner;
754 for (inner = 0; inner < 10; ++inner)
755 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
756 break;
757
758 if (inner == 10)
759 cnt += 10;
760 else
761 {
762 /* Remove the group. */
763 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
764 ((ctype->wcdigits_act - cnt - 10)
765 * sizeof (ctype->wcdigits[0])));
766 ctype->wcdigits_act -= 10;
767 }
768 }
769
770 /* If no input digits are given use the default. */
771 if (ctype->wcdigits_act == 0)
772 {
773 if (ctype->wcdigits_max == 0)
774 {
775 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
776 10 * sizeof (uint32_t));
777 ctype->wcdigits_max = 10;
778 }
779
780 for (cnt = 0; cnt < 10; ++cnt)
781 ctype->wcdigits[cnt] = L'0' + cnt;
782
783 ctype->mbdigits_act = 10;
784 }
785
786 /* Check the outdigits. */
787 warned = 0;
788 for (cnt = 0; cnt < 10; ++cnt)
789 if (ctype->mboutdigits[cnt] == NULL)
790 {
791 static struct charseq replace[2];
792
793 if (!warned)
794 {
795 error (0, 0, _("\
796 not all characters used in `outdigit' are available in the charmap"));
797 warned = 1;
798 }
799
800 replace[0].nbytes = 1;
801 replace[0].bytes[0] = '?';
802 replace[0].bytes[1] = '\0';
803 ctype->mboutdigits[cnt] = &replace[0];
804 }
805
806 warned = 0;
807 for (cnt = 0; cnt < 10; ++cnt)
808 if (ctype->wcoutdigits[cnt] == 0)
809 {
810 if (!warned)
811 {
812 error (0, 0, _("\
813 not all characters used in `outdigit' are available in the repertoire"));
814 warned = 1;
815 }
816
817 ctype->wcoutdigits[cnt] = L'?';
818 }
819
820 /* Sort the entries in the translit_ignore list. */
821 if (ctype->translit_ignore != NULL)
822 {
823 struct translit_ignore_t *firstp = ctype->translit_ignore;
824 struct translit_ignore_t *runp;
825
826 ctype->ntranslit_ignore = 1;
827
828 for (runp = firstp->next; runp != NULL; runp = runp->next)
829 {
830 struct translit_ignore_t *lastp = NULL;
831 struct translit_ignore_t *cmpp;
832
833 ++ctype->ntranslit_ignore;
834
835 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
836 if (runp->from < cmpp->from)
837 break;
838
839 runp->next = lastp;
840 if (lastp == NULL)
841 firstp = runp;
842 }
843
844 ctype->translit_ignore = firstp;
845 }
846 }
847
848
849 void
850 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
851 const char *output_path)
852 {
853 static const char nulbytes[4] = { 0, 0, 0, 0 };
854 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
855 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
856 + ctype->nr_charclass + ctype->map_collection_nr);
857 struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
858 + ctype->map_collection_nr + 4];
859 struct locale_file data;
860 uint32_t idx[nelems + 1];
861 uint32_t default_missing_len;
862 size_t elem, cnt, offset, total;
863 char *cp;
864
865 /* Now prepare the output: Find the sizes of the table we can use. */
866 allocate_arrays (ctype, charmap, ctype->repertoire);
867
868 data.magic = LIMAGIC (LC_CTYPE);
869 data.n = nelems;
870 iov[0].iov_base = (void *) &data;
871 iov[0].iov_len = sizeof (data);
872
873 iov[1].iov_base = (void *) idx;
874 iov[1].iov_len = nelems * sizeof (uint32_t);
875
876 idx[0] = iov[0].iov_len + iov[1].iov_len;
877 offset = 0;
878
879 for (elem = 0; elem < nelems; ++elem)
880 {
881 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
882 switch (elem)
883 {
884 #define CTYPE_EMPTY(name) \
885 case name: \
886 iov[2 + elem + offset].iov_base = NULL; \
887 iov[2 + elem + offset].iov_len = 0; \
888 idx[elem + 1] = idx[elem]; \
889 break
890
891 CTYPE_EMPTY(_NL_CTYPE_GAP1);
892 CTYPE_EMPTY(_NL_CTYPE_GAP2);
893 CTYPE_EMPTY(_NL_CTYPE_GAP3);
894 CTYPE_EMPTY(_NL_CTYPE_GAP4);
895 CTYPE_EMPTY(_NL_CTYPE_GAP5);
896 CTYPE_EMPTY(_NL_CTYPE_GAP6);
897
898 #define CTYPE_DATA(name, base, len) \
899 case _NL_ITEM_INDEX (name): \
900 iov[2 + elem + offset].iov_base = (base); \
901 iov[2 + elem + offset].iov_len = (len); \
902 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
903 break
904
905 CTYPE_DATA (_NL_CTYPE_CLASS,
906 ctype->ctype_b,
907 (256 + 128) * sizeof (char_class_t));
908
909 CTYPE_DATA (_NL_CTYPE_TOUPPER,
910 ctype->map_b[0],
911 (256 + 128) * sizeof (uint32_t));
912 CTYPE_DATA (_NL_CTYPE_TOLOWER,
913 ctype->map_b[1],
914 (256 + 128) * sizeof (uint32_t));
915
916 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
917 ctype->map32_b[0],
918 256 * sizeof (uint32_t));
919 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
920 ctype->map32_b[1],
921 256 * sizeof (uint32_t));
922
923 CTYPE_DATA (_NL_CTYPE_CLASS32,
924 ctype->ctype32_b,
925 256 * sizeof (char_class32_t));
926
927 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
928 &ctype->class_offset, sizeof (uint32_t));
929
930 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
931 &ctype->map_offset, sizeof (uint32_t));
932
933 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
934 &ctype->translit_idx_size, sizeof (uint32_t));
935
936 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
937 ctype->translit_from_idx,
938 ctype->translit_idx_size * sizeof (uint32_t));
939
940 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
941 ctype->translit_from_tbl,
942 ctype->translit_from_tbl_size);
943
944 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
945 ctype->translit_to_idx,
946 ctype->translit_idx_size * sizeof (uint32_t));
947
948 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
949 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
950
951 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
952 /* The class name array. */
953 total = 0;
954 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
955 {
956 iov[2 + elem + offset].iov_base
957 = (void *) ctype->classnames[cnt];
958 iov[2 + elem + offset].iov_len
959 = strlen (ctype->classnames[cnt]) + 1;
960 total += iov[2 + elem + offset].iov_len;
961 }
962 iov[2 + elem + offset].iov_base = (void *) nulbytes;
963 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
964 total += 1 + (4 - ((total + 1) % 4));
965
966 idx[elem + 1] = idx[elem] + total;
967 break;
968
969 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
970 /* The class name array. */
971 total = 0;
972 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
973 {
974 iov[2 + elem + offset].iov_base
975 = (void *) ctype->mapnames[cnt];
976 iov[2 + elem + offset].iov_len
977 = strlen (ctype->mapnames[cnt]) + 1;
978 total += iov[2 + elem + offset].iov_len;
979 }
980 iov[2 + elem + offset].iov_base = (void *) nulbytes;
981 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
982 total += 1 + (4 - ((total + 1) % 4));
983
984 idx[elem + 1] = idx[elem] + total;
985 break;
986
987 CTYPE_DATA (_NL_CTYPE_WIDTH,
988 ctype->width.iov_base,
989 ctype->width.iov_len);
990
991 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
992 &ctype->mb_cur_max, sizeof (uint32_t));
993
994 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
995 total = strlen (ctype->codeset_name) + 1;
996 if (total % 4 == 0)
997 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
998 else
999 {
1000 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1001 memset (mempcpy (iov[2 + elem + offset].iov_base,
1002 ctype->codeset_name, total),
1003 '\0', 4 - (total & 3));
1004 total = (total + 3) & ~3;
1005 }
1006 iov[2 + elem + offset].iov_len = total;
1007 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1008 break;
1009
1010 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1011 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1012 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1013 *(uint32_t *) iov[2 + elem + offset].iov_base =
1014 ctype->mbdigits_act / 10;
1015 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1016 break;
1017
1018 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1019 /* Align entries. */
1020 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1021 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1022 idx[elem] += iov[2 + elem + offset].iov_len;
1023 ++offset;
1024
1025 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1026 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1027 *(uint32_t *) iov[2 + elem + offset].iov_base =
1028 ctype->wcdigits_act / 10;
1029 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1030 break;
1031
1032 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1033 /* Compute the length of all possible characters. For INDIGITS
1034 there might be more than one. We simply concatenate all of
1035 them with a NUL byte following. The NUL byte wouldn't be
1036 necessary but it makes it easier for the user. */
1037 total = 0;
1038
1039 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1040 cnt < ctype->mbdigits_act; cnt += 10)
1041 total += ctype->mbdigits[cnt]->nbytes + 1;
1042 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1043 iov[2 + elem + offset].iov_len = total;
1044
1045 cp = iov[2 + elem + offset].iov_base;
1046 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1047 cnt < ctype->mbdigits_act; cnt += 10)
1048 {
1049 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1050 ctype->mbdigits[cnt]->nbytes);
1051 *cp++ = '\0';
1052 }
1053 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1054 break;
1055
1056 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1057 /* Compute the length of all possible characters. For INDIGITS
1058 there might be more than one. We simply concatenate all of
1059 them with a NUL byte following. The NUL byte wouldn't be
1060 necessary but it makes it easier for the user. */
1061 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1062 total = ctype->mboutdigits[cnt]->nbytes + 1;
1063 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1064 iov[2 + elem + offset].iov_len = total;
1065
1066 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1067 ctype->mboutdigits[cnt]->bytes,
1068 ctype->mboutdigits[cnt]->nbytes) = '\0';
1069 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1070 break;
1071
1072 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1073 total = ctype->wcdigits_act / 10;
1074
1075 iov[2 + elem + offset].iov_base =
1076 (uint32_t *) alloca (total * sizeof (uint32_t));
1077 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1078
1079 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1080 cnt < ctype->wcdigits_act; cnt += 10)
1081 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1082 = ctype->wcdigits[cnt];
1083 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1084 break;
1085
1086 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1087 /* Align entries. */
1088 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1089 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1090 idx[elem] += iov[2 + elem + offset].iov_len;
1091 ++offset;
1092 /* FALLTRHOUGH */
1093
1094 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1095 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1096 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1097 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1098 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1099 break;
1100
1101 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1102 /* Align entries. */
1103 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1104 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1105 idx[elem] += iov[2 + elem + offset].iov_len;
1106 ++offset;
1107
1108 default_missing_len = (ctype->default_missing
1109 ? wcslen ((wchar_t *)ctype->default_missing)
1110 : 0);
1111 iov[2 + elem + offset].iov_base = &default_missing_len;
1112 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1113 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1114 break;
1115
1116 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1117 iov[2 + elem + offset].iov_base =
1118 ctype->default_missing ?: (uint32_t *) L"";
1119 iov[2 + elem + offset].iov_len =
1120 wcslen (iov[2 + elem + offset].iov_base);
1121 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1122 break;
1123
1124 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1125 /* Align entries. */
1126 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1127 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1128 idx[elem] += iov[2 + elem + offset].iov_len;
1129 ++offset;
1130
1131 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1132 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1133 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1134 break;
1135
1136 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1137 {
1138 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1139 * 3 * sizeof (uint32_t));
1140 struct translit_ignore_t *runp;
1141
1142 iov[2 + elem + offset].iov_base = ranges;
1143 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1144 * 3 * sizeof (uint32_t));
1145
1146 for (runp = ctype->translit_ignore; runp != NULL;
1147 runp = runp->next)
1148 {
1149 *ranges++ = runp->from;
1150 *ranges++ = runp->to;
1151 *ranges++ = runp->step;
1152 }
1153 }
1154 /* Remove the following line in case a new entry is added
1155 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1156 if (elem < nelems)
1157 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158 break;
1159
1160 default:
1161 assert (! "unknown CTYPE element");
1162 }
1163 else
1164 {
1165 /* Handle extra maps. */
1166 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1167 if (nr < ctype->nr_charclass)
1168 {
1169 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1170 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1171 idx[elem] += iov[2 + elem + offset].iov_len;
1172 ++offset;
1173
1174 iov[2 + elem + offset] = ctype->class_3level[nr];
1175 }
1176 else
1177 {
1178 nr -= ctype->nr_charclass;
1179 assert (nr < ctype->map_collection_nr);
1180 iov[2 + elem + offset] = ctype->map_3level[nr];
1181 }
1182 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1183 }
1184 }
1185
1186 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1187 + ctype->map_collection_nr + 4 + 2));
1188
1189 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1190 }
1191
1192
1193 /* Local functions. */
1194 static void
1195 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1196 const char *name)
1197 {
1198 size_t cnt;
1199
1200 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1201 if (strcmp (ctype->classnames[cnt], name) == 0)
1202 break;
1203
1204 if (cnt < ctype->nr_charclass)
1205 {
1206 lr_error (lr, _("character class `%s' already defined"), name);
1207 return;
1208 }
1209
1210 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1211 /* Exit code 2 is prescribed in P1003.2b. */
1212 error (2, 0, _("\
1213 implementation limit: no more than %Zd character classes allowed"),
1214 MAX_NR_CHARCLASS);
1215
1216 ctype->classnames[ctype->nr_charclass++] = name;
1217 }
1218
1219
1220 static void
1221 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1222 const char *name, struct charmap_t *charmap)
1223 {
1224 size_t max_chars = 0;
1225 size_t cnt;
1226
1227 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1228 {
1229 if (strcmp (ctype->mapnames[cnt], name) == 0)
1230 break;
1231
1232 if (max_chars < ctype->map_collection_max[cnt])
1233 max_chars = ctype->map_collection_max[cnt];
1234 }
1235
1236 if (cnt < ctype->map_collection_nr)
1237 {
1238 lr_error (lr, _("character map `%s' already defined"), name);
1239 return;
1240 }
1241
1242 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1243 /* Exit code 2 is prescribed in P1003.2b. */
1244 error (2, 0, _("\
1245 implementation limit: no more than %d character maps allowed"),
1246 MAX_NR_CHARMAP);
1247
1248 ctype->mapnames[cnt] = name;
1249
1250 if (max_chars == 0)
1251 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1252 else
1253 ctype->map_collection_max[cnt] = max_chars;
1254
1255 ctype->map_collection[cnt] = (uint32_t *)
1256 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1257 ctype->map_collection_act[cnt] = 256;
1258
1259 ++ctype->map_collection_nr;
1260 }
1261
1262
1263 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1264 is possible if we only want to extend the name array. */
1265 static uint32_t *
1266 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1267 size_t *act, uint32_t idx)
1268 {
1269 size_t cnt;
1270
1271 if (idx < 256)
1272 return table == NULL ? NULL : &(*table)[idx];
1273
1274 /* If idx is in the usual range, use the charnames_idx lookup table
1275 instead of the slow search loop. */
1276 if (idx < MAX_CHARNAMES_IDX)
1277 {
1278 if (ctype->charnames_idx[idx] != ~((uint32_t) 0))
1279 /* Found. */
1280 cnt = ctype->charnames_idx[idx];
1281 else
1282 /* Not found. */
1283 cnt = ctype->charnames_act;
1284 }
1285 else
1286 {
1287 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1288 if (ctype->charnames[cnt] == idx)
1289 break;
1290 }
1291
1292 /* We have to distinguish two cases: the name is found or not. */
1293 if (cnt == ctype->charnames_act)
1294 {
1295 /* Extend the name array. */
1296 if (ctype->charnames_act == ctype->charnames_max)
1297 {
1298 ctype->charnames_max *= 2;
1299 ctype->charnames = (uint32_t *)
1300 xrealloc (ctype->charnames,
1301 sizeof (uint32_t) * ctype->charnames_max);
1302 }
1303 ctype->charnames[ctype->charnames_act++] = idx;
1304 if (idx < MAX_CHARNAMES_IDX)
1305 ctype->charnames_idx[idx] = cnt;
1306 }
1307
1308 if (table == NULL)
1309 /* We have done everything we are asked to do. */
1310 return NULL;
1311
1312 if (cnt >= *act)
1313 {
1314 if (cnt >= *max)
1315 {
1316 size_t old_max = *max;
1317 do
1318 *max *= 2;
1319 while (*max <= cnt);
1320
1321 *table =
1322 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1323 memset (&(*table)[old_max], '\0',
1324 (*max - old_max) * sizeof (uint32_t));
1325 }
1326
1327 *act = cnt + 1;
1328 }
1329
1330 return &(*table)[cnt];
1331 }
1332
1333
1334 static int
1335 get_character (struct token *now, struct charmap_t *charmap,
1336 struct repertoire_t *repertoire,
1337 struct charseq **seqp, uint32_t *wchp)
1338 {
1339 if (now->tok == tok_bsymbol)
1340 {
1341 /* This will hopefully be the normal case. */
1342 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1343 now->val.str.lenmb);
1344 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1345 now->val.str.lenmb);
1346 }
1347 else if (now->tok == tok_ucs4)
1348 {
1349 char utmp[10];
1350
1351 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1352 *seqp = charmap_find_value (charmap, utmp, 9);
1353
1354 if (*seqp == NULL)
1355 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1356
1357 if (*seqp == NULL)
1358 {
1359 /* Compute the value in the charmap from the UCS value. */
1360 const char *symbol = repertoire_find_symbol (repertoire,
1361 now->val.ucs4);
1362
1363 if (symbol == NULL)
1364 *seqp = NULL;
1365 else
1366 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1367
1368 if (*seqp == NULL)
1369 {
1370 if (repertoire != NULL)
1371 {
1372 /* Insert a negative entry. */
1373 static const struct charseq negative
1374 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1375 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1376 sizeof (uint32_t));
1377 *newp = now->val.ucs4;
1378
1379 insert_entry (&repertoire->seq_table, newp,
1380 sizeof (uint32_t), (void *) &negative);
1381 }
1382 }
1383 else
1384 (*seqp)->ucs4 = now->val.ucs4;
1385 }
1386 else if ((*seqp)->ucs4 != now->val.ucs4)
1387 *seqp = NULL;
1388
1389 *wchp = now->val.ucs4;
1390 }
1391 else if (now->tok == tok_charcode)
1392 {
1393 /* We must map from the byte code to UCS4. */
1394 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1395 now->val.str.lenmb);
1396
1397 if (*seqp == NULL)
1398 *wchp = ILLEGAL_CHAR_VALUE;
1399 else
1400 {
1401 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1402 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1403 strlen ((*seqp)->name));
1404 *wchp = (*seqp)->ucs4;
1405 }
1406 }
1407 else
1408 return 1;
1409
1410 return 0;
1411 }
1412
1413
1414 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1415 the .(2). counterparts. */
1416 static void
1417 charclass_symbolic_ellipsis (struct linereader *ldfile,
1418 struct locale_ctype_t *ctype,
1419 struct charmap_t *charmap,
1420 struct repertoire_t *repertoire,
1421 struct token *now,
1422 const char *last_str,
1423 unsigned long int class256_bit,
1424 unsigned long int class_bit, int base,
1425 int ignore_content, int handle_digits, int step)
1426 {
1427 const char *nowstr = now->val.str.startmb;
1428 char tmp[now->val.str.lenmb + 1];
1429 const char *cp;
1430 char *endp;
1431 unsigned long int from;
1432 unsigned long int to;
1433
1434 /* We have to compute the ellipsis values using the symbolic names. */
1435 assert (last_str != NULL);
1436
1437 if (strlen (last_str) != now->val.str.lenmb)
1438 {
1439 invalid_range:
1440 lr_error (ldfile,
1441 _("`%s' and `%.*s' are no valid names for symbolic range"),
1442 last_str, (int) now->val.str.lenmb, nowstr);
1443 return;
1444 }
1445
1446 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1447 /* Nothing to do, the names are the same. */
1448 return;
1449
1450 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1451 ;
1452
1453 errno = 0;
1454 from = strtoul (cp, &endp, base);
1455 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1456 goto invalid_range;
1457
1458 to = strtoul (nowstr + (cp - last_str), &endp, base);
1459 if ((to == UINT_MAX && errno == ERANGE)
1460 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1461 goto invalid_range;
1462
1463 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1464 if (!ignore_content)
1465 {
1466 now->val.str.startmb = tmp;
1467 while ((from += step) <= to)
1468 {
1469 struct charseq *seq;
1470 uint32_t wch;
1471
1472 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1473 last_str, now->val.str.lenmb - (cp - last_str), from);
1474
1475 get_character (now, charmap, repertoire, &seq, &wch);
1476
1477 if (seq != NULL && seq->nbytes == 1)
1478 /* Yep, we can store information about this byte sequence. */
1479 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1480
1481 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1482 /* We have the UCS4 position. */
1483 *find_idx (ctype, &ctype->class_collection,
1484 &ctype->class_collection_max,
1485 &ctype->class_collection_act, wch) |= class_bit;
1486
1487 if (handle_digits == 1)
1488 {
1489 /* We must store the digit values. */
1490 if (ctype->mbdigits_act == ctype->mbdigits_max)
1491 {
1492 ctype->mbdigits_max *= 2;
1493 ctype->mbdigits = xrealloc (ctype->mbdigits,
1494 (ctype->mbdigits_max
1495 * sizeof (char *)));
1496 ctype->wcdigits_max *= 2;
1497 ctype->wcdigits = xrealloc (ctype->wcdigits,
1498 (ctype->wcdigits_max
1499 * sizeof (uint32_t)));
1500 }
1501
1502 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1503 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1504 }
1505 else if (handle_digits == 2)
1506 {
1507 /* We must store the digit values. */
1508 if (ctype->outdigits_act >= 10)
1509 {
1510 lr_error (ldfile, _("\
1511 %s: field `%s' does not contain exactly ten entries"),
1512 "LC_CTYPE", "outdigit");
1513 return;
1514 }
1515
1516 ctype->mboutdigits[ctype->outdigits_act] = seq;
1517 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1518 ++ctype->outdigits_act;
1519 }
1520 }
1521 }
1522 }
1523
1524
1525 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1526 static void
1527 charclass_ucs4_ellipsis (struct linereader *ldfile,
1528 struct locale_ctype_t *ctype,
1529 struct charmap_t *charmap,
1530 struct repertoire_t *repertoire,
1531 struct token *now, uint32_t last_wch,
1532 unsigned long int class256_bit,
1533 unsigned long int class_bit, int ignore_content,
1534 int handle_digits, int step)
1535 {
1536 if (last_wch > now->val.ucs4)
1537 {
1538 lr_error (ldfile, _("\
1539 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1540 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1541 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1542 return;
1543 }
1544
1545 if (!ignore_content)
1546 while ((last_wch += step) <= now->val.ucs4)
1547 {
1548 /* We have to find out whether there is a byte sequence corresponding
1549 to this UCS4 value. */
1550 struct charseq *seq;
1551 char utmp[10];
1552
1553 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1554 seq = charmap_find_value (charmap, utmp, 9);
1555 if (seq == NULL)
1556 {
1557 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1558 seq = charmap_find_value (charmap, utmp, 5);
1559 }
1560
1561 if (seq == NULL)
1562 /* Try looking in the repertoire map. */
1563 seq = repertoire_find_seq (repertoire, last_wch);
1564
1565 /* If this is the first time we look for this sequence create a new
1566 entry. */
1567 if (seq == NULL)
1568 {
1569 static const struct charseq negative
1570 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1571
1572 /* Find the symbolic name for this UCS4 value. */
1573 if (repertoire != NULL)
1574 {
1575 const char *symbol = repertoire_find_symbol (repertoire,
1576 last_wch);
1577 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1578 sizeof (uint32_t));
1579 *newp = last_wch;
1580
1581 if (symbol != NULL)
1582 /* We have a name, now search the multibyte value. */
1583 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1584
1585 if (seq == NULL)
1586 /* We have to create a fake entry. */
1587 seq = (struct charseq *) &negative;
1588 else
1589 seq->ucs4 = last_wch;
1590
1591 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1592 seq);
1593 }
1594 else
1595 /* We have to create a fake entry. */
1596 seq = (struct charseq *) &negative;
1597 }
1598
1599 /* We have a name, now search the multibyte value. */
1600 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1601 /* Yep, we can store information about this byte sequence. */
1602 ctype->class256_collection[(size_t) seq->bytes[0]]
1603 |= class256_bit;
1604
1605 /* And of course we have the UCS4 position. */
1606 if (class_bit != 0)
1607 *find_idx (ctype, &ctype->class_collection,
1608 &ctype->class_collection_max,
1609 &ctype->class_collection_act, last_wch) |= class_bit;
1610
1611 if (handle_digits == 1)
1612 {
1613 /* We must store the digit values. */
1614 if (ctype->mbdigits_act == ctype->mbdigits_max)
1615 {
1616 ctype->mbdigits_max *= 2;
1617 ctype->mbdigits = xrealloc (ctype->mbdigits,
1618 (ctype->mbdigits_max
1619 * sizeof (char *)));
1620 ctype->wcdigits_max *= 2;
1621 ctype->wcdigits = xrealloc (ctype->wcdigits,
1622 (ctype->wcdigits_max
1623 * sizeof (uint32_t)));
1624 }
1625
1626 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1627 ? seq : NULL);
1628 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1629 }
1630 else if (handle_digits == 2)
1631 {
1632 /* We must store the digit values. */
1633 if (ctype->outdigits_act >= 10)
1634 {
1635 lr_error (ldfile, _("\
1636 %s: field `%s' does not contain exactly ten entries"),
1637 "LC_CTYPE", "outdigit");
1638 return;
1639 }
1640
1641 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1642 ? seq : NULL);
1643 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1644 ++ctype->outdigits_act;
1645 }
1646 }
1647 }
1648
1649
1650 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1651 static void
1652 charclass_charcode_ellipsis (struct linereader *ldfile,
1653 struct locale_ctype_t *ctype,
1654 struct charmap_t *charmap,
1655 struct repertoire_t *repertoire,
1656 struct token *now, char *last_charcode,
1657 uint32_t last_charcode_len,
1658 unsigned long int class256_bit,
1659 unsigned long int class_bit, int ignore_content,
1660 int handle_digits)
1661 {
1662 /* First check whether the to-value is larger. */
1663 if (now->val.charcode.nbytes != last_charcode_len)
1664 {
1665 lr_error (ldfile, _("\
1666 start end end character sequence of range must have the same length"));
1667 return;
1668 }
1669
1670 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1671 {
1672 lr_error (ldfile, _("\
1673 to-value character sequence is smaller than from-value sequence"));
1674 return;
1675 }
1676
1677 if (!ignore_content)
1678 {
1679 do
1680 {
1681 /* Increment the byte sequence value. */
1682 struct charseq *seq;
1683 uint32_t wch;
1684 int i;
1685
1686 for (i = last_charcode_len - 1; i >= 0; --i)
1687 if (++last_charcode[i] != 0)
1688 break;
1689
1690 if (last_charcode_len == 1)
1691 /* Of course we have the charcode value. */
1692 ctype->class256_collection[(size_t) last_charcode[0]]
1693 |= class256_bit;
1694
1695 /* Find the symbolic name. */
1696 seq = charmap_find_symbol (charmap, last_charcode,
1697 last_charcode_len);
1698 if (seq != NULL)
1699 {
1700 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1701 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1702 strlen (seq->name));
1703 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1704
1705 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1706 *find_idx (ctype, &ctype->class_collection,
1707 &ctype->class_collection_max,
1708 &ctype->class_collection_act, wch) |= class_bit;
1709 }
1710 else
1711 wch = ILLEGAL_CHAR_VALUE;
1712
1713 if (handle_digits == 1)
1714 {
1715 /* We must store the digit values. */
1716 if (ctype->mbdigits_act == ctype->mbdigits_max)
1717 {
1718 ctype->mbdigits_max *= 2;
1719 ctype->mbdigits = xrealloc (ctype->mbdigits,
1720 (ctype->mbdigits_max
1721 * sizeof (char *)));
1722 ctype->wcdigits_max *= 2;
1723 ctype->wcdigits = xrealloc (ctype->wcdigits,
1724 (ctype->wcdigits_max
1725 * sizeof (uint32_t)));
1726 }
1727
1728 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1729 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1730 seq->nbytes = last_charcode_len;
1731
1732 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1733 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1734 }
1735 else if (handle_digits == 2)
1736 {
1737 struct charseq *seq;
1738 /* We must store the digit values. */
1739 if (ctype->outdigits_act >= 10)
1740 {
1741 lr_error (ldfile, _("\
1742 %s: field `%s' does not contain exactly ten entries"),
1743 "LC_CTYPE", "outdigit");
1744 return;
1745 }
1746
1747 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1748 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1749 seq->nbytes = last_charcode_len;
1750
1751 ctype->mboutdigits[ctype->outdigits_act] = seq;
1752 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1753 ++ctype->outdigits_act;
1754 }
1755 }
1756 while (memcmp (last_charcode, now->val.charcode.bytes,
1757 last_charcode_len) != 0);
1758 }
1759 }
1760
1761
1762 /* Read one transliteration entry. */
1763 static uint32_t *
1764 read_widestring (struct linereader *ldfile, struct token *now,
1765 struct charmap_t *charmap, struct repertoire_t *repertoire)
1766 {
1767 uint32_t *wstr;
1768
1769 if (now->tok == tok_default_missing)
1770 /* The special name "" will denote this case. */
1771 wstr = ((uint32_t *) { 0 });
1772 else if (now->tok == tok_bsymbol)
1773 {
1774 /* Get the value from the repertoire. */
1775 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1776 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1777 now->val.str.lenmb);
1778 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1779 {
1780 /* We cannot proceed, we don't know the UCS4 value. */
1781 free (wstr);
1782 return NULL;
1783 }
1784
1785 wstr[1] = 0;
1786 }
1787 else if (now->tok == tok_ucs4)
1788 {
1789 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1790 wstr[0] = now->val.ucs4;
1791 wstr[1] = 0;
1792 }
1793 else if (now->tok == tok_charcode)
1794 {
1795 /* Argh, we have to convert to the symbol name first and then to the
1796 UCS4 value. */
1797 struct charseq *seq = charmap_find_symbol (charmap,
1798 now->val.str.startmb,
1799 now->val.str.lenmb);
1800 if (seq == NULL)
1801 /* Cannot find the UCS4 value. */
1802 return NULL;
1803
1804 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1805 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1806 strlen (seq->name));
1807 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1808 /* We cannot proceed, we don't know the UCS4 value. */
1809 return NULL;
1810
1811 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1812 wstr[0] = seq->ucs4;
1813 wstr[1] = 0;
1814 }
1815 else if (now->tok == tok_string)
1816 {
1817 wstr = now->val.str.startwc;
1818 if (wstr == NULL || wstr[0] == 0)
1819 return NULL;
1820 }
1821 else
1822 {
1823 if (now->tok != tok_eol && now->tok != tok_eof)
1824 lr_ignore_rest (ldfile, 0);
1825 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1826 return (uint32_t *) -1l;
1827 }
1828
1829 return wstr;
1830 }
1831
1832
1833 static void
1834 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1835 struct token *now, struct charmap_t *charmap,
1836 struct repertoire_t *repertoire)
1837 {
1838 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1839 struct translit_t *result;
1840 struct translit_to_t **top;
1841 struct obstack *ob = &ctype->mempool;
1842 int first;
1843 int ignore;
1844
1845 if (from_wstr == NULL)
1846 /* There is no valid from string. */
1847 return;
1848
1849 result = (struct translit_t *) obstack_alloc (ob,
1850 sizeof (struct translit_t));
1851 result->from = from_wstr;
1852 result->fname = ldfile->fname;
1853 result->lineno = ldfile->lineno;
1854 result->next = NULL;
1855 result->to = NULL;
1856 top = &result->to;
1857 first = 1;
1858 ignore = 0;
1859
1860 while (1)
1861 {
1862 uint32_t *to_wstr;
1863
1864 /* Next we have one or more transliterations. They are
1865 separated by semicolons. */
1866 now = lr_token (ldfile, charmap, repertoire);
1867
1868 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1869 {
1870 /* One string read. */
1871 const uint32_t zero = 0;
1872
1873 if (!ignore)
1874 {
1875 obstack_grow (ob, &zero, 4);
1876 to_wstr = obstack_finish (ob);
1877
1878 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1879 (*top)->str = to_wstr;
1880 (*top)->next = NULL;
1881 }
1882
1883 if (now->tok == tok_eol)
1884 {
1885 result->next = ctype->translit;
1886 ctype->translit = result;
1887 return;
1888 }
1889
1890 if (!ignore)
1891 top = &(*top)->next;
1892 ignore = 0;
1893 }
1894 else
1895 {
1896 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1897 if (to_wstr == (uint32_t *) -1l)
1898 {
1899 /* An error occurred. */
1900 obstack_free (ob, result);
1901 return;
1902 }
1903
1904 if (to_wstr == NULL)
1905 ignore = 1;
1906 else
1907 /* This value is usable. */
1908 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1909
1910 first = 0;
1911 }
1912 }
1913 }
1914
1915
1916 static void
1917 read_translit_ignore_entry (struct linereader *ldfile,
1918 struct locale_ctype_t *ctype,
1919 struct charmap_t *charmap,
1920 struct repertoire_t *repertoire)
1921 {
1922 /* We expect a semicolon-separated list of characters we ignore. We are
1923 only interested in the wide character definitions. These must be
1924 single characters, possibly defining a range when an ellipsis is used. */
1925 while (1)
1926 {
1927 struct token *now = lr_token (ldfile, charmap, repertoire);
1928 struct translit_ignore_t *newp;
1929 uint32_t from;
1930
1931 if (now->tok == tok_eol || now->tok == tok_eof)
1932 {
1933 lr_error (ldfile,
1934 _("premature end of `translit_ignore' definition"));
1935 return;
1936 }
1937
1938 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1939 {
1940 lr_error (ldfile, _("syntax error"));
1941 lr_ignore_rest (ldfile, 0);
1942 return;
1943 }
1944
1945 if (now->tok == tok_ucs4)
1946 from = now->val.ucs4;
1947 else
1948 /* Try to get the value. */
1949 from = repertoire_find_value (repertoire, now->val.str.startmb,
1950 now->val.str.lenmb);
1951
1952 if (from == ILLEGAL_CHAR_VALUE)
1953 {
1954 lr_error (ldfile, "invalid character name");
1955 newp = NULL;
1956 }
1957 else
1958 {
1959 newp = (struct translit_ignore_t *)
1960 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1961 newp->from = from;
1962 newp->to = from;
1963 newp->step = 1;
1964
1965 newp->next = ctype->translit_ignore;
1966 ctype->translit_ignore = newp;
1967 }
1968
1969 /* Now we expect either a semicolon, an ellipsis, or the end of the
1970 line. */
1971 now = lr_token (ldfile, charmap, repertoire);
1972
1973 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1974 {
1975 /* XXX Should we bother implementing `....'? `...' certainly
1976 will not be implemented. */
1977 uint32_t to;
1978 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
1979
1980 now = lr_token (ldfile, charmap, repertoire);
1981
1982 if (now->tok == tok_eol || now->tok == tok_eof)
1983 {
1984 lr_error (ldfile,
1985 _("premature end of `translit_ignore' definition"));
1986 return;
1987 }
1988
1989 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1990 {
1991 lr_error (ldfile, _("syntax error"));
1992 lr_ignore_rest (ldfile, 0);
1993 return;
1994 }
1995
1996 if (now->tok == tok_ucs4)
1997 to = now->val.ucs4;
1998 else
1999 /* Try to get the value. */
2000 to = repertoire_find_value (repertoire, now->val.str.startmb,
2001 now->val.str.lenmb);
2002
2003 if (to == ILLEGAL_CHAR_VALUE)
2004 lr_error (ldfile, "invalid character name");
2005 else
2006 {
2007 /* Make sure the `to'-value is larger. */
2008 if (to >= from)
2009 {
2010 newp->to = to;
2011 newp->step = step;
2012 }
2013 else
2014 lr_error (ldfile, _("\
2015 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2016 (to | from) < 65536 ? 4 : 8, to,
2017 (to | from) < 65536 ? 4 : 8, from);
2018 }
2019
2020 /* And the next token. */
2021 now = lr_token (ldfile, charmap, repertoire);
2022 }
2023
2024 if (now->tok == tok_eol || now->tok == tok_eof)
2025 /* We are done. */
2026 return;
2027
2028 if (now->tok == tok_semicolon)
2029 /* Next round. */
2030 continue;
2031
2032 /* If we come here something is wrong. */
2033 lr_error (ldfile, _("syntax error"));
2034 lr_ignore_rest (ldfile, 0);
2035 return;
2036 }
2037 }
2038
2039
2040 /* The parser for the LC_CTYPE section of the locale definition. */
2041 void
2042 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2043 struct charmap_t *charmap, const char *repertoire_name,
2044 int ignore_content)
2045 {
2046 struct repertoire_t *repertoire = NULL;
2047 struct locale_ctype_t *ctype;
2048 struct token *now;
2049 enum token_t nowtok;
2050 size_t cnt;
2051 struct charseq *last_seq;
2052 uint32_t last_wch = 0;
2053 enum token_t last_token;
2054 enum token_t ellipsis_token;
2055 int step;
2056 char last_charcode[16];
2057 size_t last_charcode_len = 0;
2058 const char *last_str = NULL;
2059 int mapidx;
2060 struct localedef_t *copy_locale = NULL;
2061
2062 /* Get the repertoire we have to use. */
2063 if (repertoire_name != NULL)
2064 repertoire = repertoire_read (repertoire_name);
2065
2066 /* The rest of the line containing `LC_CTYPE' must be free. */
2067 lr_ignore_rest (ldfile, 1);
2068
2069
2070 do
2071 {
2072 now = lr_token (ldfile, charmap, NULL);
2073 nowtok = now->tok;
2074 }
2075 while (nowtok == tok_eol);
2076
2077 /* If we see `copy' now we are almost done. */
2078 if (nowtok == tok_copy)
2079 {
2080 now = lr_token (ldfile, charmap, NULL);
2081 if (now->tok != tok_string)
2082 {
2083 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2084
2085 skip_category:
2086 do
2087 now = lr_token (ldfile, charmap, NULL);
2088 while (now->tok != tok_eof && now->tok != tok_end);
2089
2090 if (now->tok != tok_eof
2091 || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
2092 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2093 else if (now->tok != tok_lc_ctype)
2094 {
2095 lr_error (ldfile, _("\
2096 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2097 lr_ignore_rest (ldfile, 0);
2098 }
2099 else
2100 lr_ignore_rest (ldfile, 1);
2101
2102 return;
2103 }
2104
2105 if (! ignore_content)
2106 {
2107 /* Get the locale definition. */
2108 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2109 repertoire_name, charmap, NULL);
2110 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2111 {
2112 /* Not yet loaded. So do it now. */
2113 if (locfile_read (copy_locale, charmap) != 0)
2114 goto skip_category;
2115 }
2116 }
2117
2118 lr_ignore_rest (ldfile, 1);
2119
2120 now = lr_token (ldfile, charmap, NULL);
2121 nowtok = now->tok;
2122 }
2123
2124 /* Prepare the data structures. */
2125 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2126 ctype = result->categories[LC_CTYPE].ctype;
2127
2128 /* Remember the repertoire we use. */
2129 if (!ignore_content)
2130 ctype->repertoire = repertoire;
2131
2132 while (1)
2133 {
2134 unsigned long int class_bit = 0;
2135 unsigned long int class256_bit = 0;
2136 int handle_digits = 0;
2137
2138 /* Of course we don't proceed beyond the end of file. */
2139 if (nowtok == tok_eof)
2140 break;
2141
2142 /* Ingore empty lines. */
2143 if (nowtok == tok_eol)
2144 {
2145 now = lr_token (ldfile, charmap, NULL);
2146 nowtok = now->tok;
2147 continue;
2148 }
2149
2150 switch (nowtok)
2151 {
2152 case tok_charclass:
2153 now = lr_token (ldfile, charmap, NULL);
2154 while (now->tok == tok_ident || now->tok == tok_string)
2155 {
2156 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2157 now = lr_token (ldfile, charmap, NULL);
2158 if (now->tok != tok_semicolon)
2159 break;
2160 now = lr_token (ldfile, charmap, NULL);
2161 }
2162 if (now->tok != tok_eol)
2163 SYNTAX_ERROR (_("\
2164 %s: syntax error in definition of new character class"), "LC_CTYPE");
2165 break;
2166
2167 case tok_charconv:
2168 now = lr_token (ldfile, charmap, NULL);
2169 while (now->tok == tok_ident || now->tok == tok_string)
2170 {
2171 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2172 now = lr_token (ldfile, charmap, NULL);
2173 if (now->tok != tok_semicolon)
2174 break;
2175 now = lr_token (ldfile, charmap, NULL);
2176 }
2177 if (now->tok != tok_eol)
2178 SYNTAX_ERROR (_("\
2179 %s: syntax error in definition of new character map"), "LC_CTYPE");
2180 break;
2181
2182 case tok_class:
2183 /* Ignore the rest of the line if we don't need the input of
2184 this line. */
2185 if (ignore_content)
2186 {
2187 lr_ignore_rest (ldfile, 0);
2188 break;
2189 }
2190
2191 /* We simply forget the `class' keyword and use the following
2192 operand to determine the bit. */
2193 now = lr_token (ldfile, charmap, NULL);
2194 if (now->tok == tok_ident || now->tok == tok_string)
2195 {
2196 /* Must can be one of the predefined class names. */
2197 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2198 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2199 break;
2200 if (cnt >= ctype->nr_charclass)
2201 {
2202 #ifdef PREDEFINED_CLASSES
2203 if (now->val.str.lenmb == 8
2204 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2205 class_bit = _ISwspecial1;
2206 else if (now->val.str.lenmb == 8
2207 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2208 class_bit = _ISwspecial2;
2209 else if (now->val.str.lenmb == 8
2210 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2211 class_bit = _ISwspecial3;
2212 else
2213 #endif
2214 {
2215 /* OK, it's a new class. */
2216 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2217
2218 class_bit = _ISwbit (ctype->nr_charclass - 1);
2219 }
2220 }
2221 else
2222 {
2223 class_bit = _ISwbit (cnt);
2224
2225 free (now->val.str.startmb);
2226 }
2227 }
2228 else if (now->tok == tok_digit)
2229 goto handle_tok_digit;
2230 else if (now->tok < tok_upper || now->tok > tok_blank)
2231 goto err_label;
2232 else
2233 {
2234 class_bit = BITw (now->tok);
2235 class256_bit = BIT (now->tok);
2236 }
2237
2238 /* The next character must be a semicolon. */
2239 now = lr_token (ldfile, charmap, NULL);
2240 if (now->tok != tok_semicolon)
2241 goto err_label;
2242 goto read_charclass;
2243
2244 case tok_upper:
2245 case tok_lower:
2246 case tok_alpha:
2247 case tok_alnum:
2248 case tok_space:
2249 case tok_cntrl:
2250 case tok_punct:
2251 case tok_graph:
2252 case tok_print:
2253 case tok_xdigit:
2254 case tok_blank:
2255 /* Ignore the rest of the line if we don't need the input of
2256 this line. */
2257 if (ignore_content)
2258 {
2259 lr_ignore_rest (ldfile, 0);
2260 break;
2261 }
2262
2263 class_bit = BITw (now->tok);
2264 class256_bit = BIT (now->tok);
2265 handle_digits = 0;
2266 read_charclass:
2267 ctype->class_done |= class_bit;
2268 last_token = tok_none;
2269 ellipsis_token = tok_none;
2270 step = 1;
2271 now = lr_token (ldfile, charmap, NULL);
2272 while (now->tok != tok_eol && now->tok != tok_eof)
2273 {
2274 uint32_t wch;
2275 struct charseq *seq;
2276
2277 if (ellipsis_token == tok_none)
2278 {
2279 if (get_character (now, charmap, repertoire, &seq, &wch))
2280 goto err_label;
2281
2282 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2283 /* Yep, we can store information about this byte
2284 sequence. */
2285 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2286
2287 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2288 && class_bit != 0)
2289 /* We have the UCS4 position. */
2290 *find_idx (ctype, &ctype->class_collection,
2291 &ctype->class_collection_max,
2292 &ctype->class_collection_act, wch) |= class_bit;
2293
2294 last_token = now->tok;
2295 /* Terminate the string. */
2296 if (last_token == tok_bsymbol)
2297 {
2298 now->val.str.startmb[now->val.str.lenmb] = '\0';
2299 last_str = now->val.str.startmb;
2300 }
2301 else
2302 last_str = NULL;
2303 last_seq = seq;
2304 last_wch = wch;
2305 memcpy (last_charcode, now->val.charcode.bytes, 16);
2306 last_charcode_len = now->val.charcode.nbytes;
2307
2308 if (!ignore_content && handle_digits == 1)
2309 {
2310 /* We must store the digit values. */
2311 if (ctype->mbdigits_act == ctype->mbdigits_max)
2312 {
2313 ctype->mbdigits_max += 10;
2314 ctype->mbdigits = xrealloc (ctype->mbdigits,
2315 (ctype->mbdigits_max
2316 * sizeof (char *)));
2317 ctype->wcdigits_max += 10;
2318 ctype->wcdigits = xrealloc (ctype->wcdigits,
2319 (ctype->wcdigits_max
2320 * sizeof (uint32_t)));
2321 }
2322
2323 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2324 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2325 }
2326 else if (!ignore_content && handle_digits == 2)
2327 {
2328 /* We must store the digit values. */
2329 if (ctype->outdigits_act >= 10)
2330 {
2331 lr_error (ldfile, _("\
2332 %s: field `%s' does not contain exactly ten entries"),
2333 "LC_CTYPE", "outdigit");
2334 lr_ignore_rest (ldfile, 0);
2335 break;
2336 }
2337
2338 ctype->mboutdigits[ctype->outdigits_act] = seq;
2339 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2340 ++ctype->outdigits_act;
2341 }
2342 }
2343 else
2344 {
2345 /* Now it gets complicated. We have to resolve the
2346 ellipsis problem. First we must distinguish between
2347 the different kind of ellipsis and this must match the
2348 tokens we have seen. */
2349 assert (last_token != tok_none);
2350
2351 if (last_token != now->tok)
2352 {
2353 lr_error (ldfile, _("\
2354 ellipsis range must be marked by two operands of same type"));
2355 lr_ignore_rest (ldfile, 0);
2356 break;
2357 }
2358
2359 if (last_token == tok_bsymbol)
2360 {
2361 if (ellipsis_token == tok_ellipsis3)
2362 lr_error (ldfile, _("with symbolic name range values \
2363 the absolute ellipsis `...' must not be used"));
2364
2365 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2366 repertoire, now, last_str,
2367 class256_bit, class_bit,
2368 (ellipsis_token
2369 == tok_ellipsis4
2370 ? 10 : 16),
2371 ignore_content,
2372 handle_digits, step);
2373 }
2374 else if (last_token == tok_ucs4)
2375 {
2376 if (ellipsis_token != tok_ellipsis2)
2377 lr_error (ldfile, _("\
2378 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2379
2380 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2381 repertoire, now, last_wch,
2382 class256_bit, class_bit,
2383 ignore_content, handle_digits,
2384 step);
2385 }
2386 else
2387 {
2388 assert (last_token == tok_charcode);
2389
2390 if (ellipsis_token != tok_ellipsis3)
2391 lr_error (ldfile, _("\
2392 with character code range values one must use the absolute ellipsis `...'"));
2393
2394 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2395 repertoire, now,
2396 last_charcode,
2397 last_charcode_len,
2398 class256_bit, class_bit,
2399 ignore_content,
2400 handle_digits);
2401 }
2402
2403 /* Now we have used the last value. */
2404 last_token = tok_none;
2405 }
2406
2407 /* Next we expect a semicolon or the end of the line. */
2408 now = lr_token (ldfile, charmap, NULL);
2409 if (now->tok == tok_eol || now->tok == tok_eof)
2410 break;
2411
2412 if (last_token != tok_none
2413 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2414 {
2415 if (now->tok == tok_ellipsis2_2)
2416 {
2417 now->tok = tok_ellipsis2;
2418 step = 2;
2419 }
2420 else if (now->tok == tok_ellipsis4_2)
2421 {
2422 now->tok = tok_ellipsis4;
2423 step = 2;
2424 }
2425
2426 ellipsis_token = now->tok;
2427
2428 now = lr_token (ldfile, charmap, NULL);
2429 continue;
2430 }
2431
2432 if (now->tok != tok_semicolon)
2433 goto err_label;
2434
2435 /* And get the next character. */
2436 now = lr_token (ldfile, charmap, NULL);
2437
2438 ellipsis_token = tok_none;
2439 step = 1;
2440 }
2441 break;
2442
2443 case tok_digit:
2444 /* Ignore the rest of the line if we don't need the input of
2445 this line. */
2446 if (ignore_content)
2447 {
2448 lr_ignore_rest (ldfile, 0);
2449 break;
2450 }
2451
2452 handle_tok_digit:
2453 class_bit = _ISwdigit;
2454 class256_bit = _ISdigit;
2455 handle_digits = 1;
2456 goto read_charclass;
2457
2458 case tok_outdigit:
2459 /* Ignore the rest of the line if we don't need the input of
2460 this line. */
2461 if (ignore_content)
2462 {
2463 lr_ignore_rest (ldfile, 0);
2464 break;
2465 }
2466
2467 if (ctype->outdigits_act != 0)
2468 lr_error (ldfile, _("\
2469 %s: field `%s' declared more than once"),
2470 "LC_CTYPE", "outdigit");
2471 class_bit = 0;
2472 class256_bit = 0;
2473 handle_digits = 2;
2474 goto read_charclass;
2475
2476 case tok_toupper:
2477 /* Ignore the rest of the line if we don't need the input of
2478 this line. */
2479 if (ignore_content)
2480 {
2481 lr_ignore_rest (ldfile, 0);
2482 break;
2483 }
2484
2485 mapidx = 0;
2486 goto read_mapping;
2487
2488 case tok_tolower:
2489 /* Ignore the rest of the line if we don't need the input of
2490 this line. */
2491 if (ignore_content)
2492 {
2493 lr_ignore_rest (ldfile, 0);
2494 break;
2495 }
2496
2497 mapidx = 1;
2498 goto read_mapping;
2499
2500 case tok_map:
2501 /* Ignore the rest of the line if we don't need the input of
2502 this line. */
2503 if (ignore_content)
2504 {
2505 lr_ignore_rest (ldfile, 0);
2506 break;
2507 }
2508
2509 /* We simply forget the `map' keyword and use the following
2510 operand to determine the mapping. */
2511 now = lr_token (ldfile, charmap, NULL);
2512 if (now->tok == tok_ident || now->tok == tok_string)
2513 {
2514 size_t cnt;
2515
2516 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2517 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2518 break;
2519
2520 if (cnt < ctype->map_collection_nr)
2521 free (now->val.str.startmb);
2522 else
2523 /* OK, it's a new map. */
2524 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2525
2526 mapidx = cnt;
2527 }
2528 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2529 goto err_label;
2530 else
2531 mapidx = now->tok - tok_toupper;
2532
2533 now = lr_token (ldfile, charmap, NULL);
2534 /* This better should be a semicolon. */
2535 if (now->tok != tok_semicolon)
2536 goto err_label;
2537
2538 read_mapping:
2539 /* Test whether this mapping was already defined. */
2540 if (ctype->tomap_done[mapidx])
2541 {
2542 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2543 ctype->mapnames[mapidx]);
2544 lr_ignore_rest (ldfile, 0);
2545 break;
2546 }
2547 ctype->tomap_done[mapidx] = 1;
2548
2549 now = lr_token (ldfile, charmap, NULL);
2550 while (now->tok != tok_eol && now->tok != tok_eof)
2551 {
2552 struct charseq *from_seq;
2553 uint32_t from_wch;
2554 struct charseq *to_seq;
2555 uint32_t to_wch;
2556
2557 /* Every pair starts with an opening brace. */
2558 if (now->tok != tok_open_brace)
2559 goto err_label;
2560
2561 /* Next comes the from-value. */
2562 now = lr_token (ldfile, charmap, NULL);
2563 if (get_character (now, charmap, repertoire, &from_seq,
2564 &from_wch) != 0)
2565 goto err_label;
2566
2567 /* The next is a comma. */
2568 now = lr_token (ldfile, charmap, NULL);
2569 if (now->tok != tok_comma)
2570 goto err_label;
2571
2572 /* And the other value. */
2573 now = lr_token (ldfile, charmap, NULL);
2574 if (get_character (now, charmap, repertoire, &to_seq,
2575 &to_wch) != 0)
2576 goto err_label;
2577
2578 /* And the last thing is the closing brace. */
2579 now = lr_token (ldfile, charmap, NULL);
2580 if (now->tok != tok_close_brace)
2581 goto err_label;
2582
2583 if (!ignore_content)
2584 {
2585 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2586 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2587 /* We can use this value. */
2588 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2589 = to_seq->bytes[0];
2590
2591 if (from_wch != ILLEGAL_CHAR_VALUE
2592 && to_wch != ILLEGAL_CHAR_VALUE)
2593 /* Both correct values. */
2594 *find_idx (ctype, &ctype->map_collection[mapidx],
2595 &ctype->map_collection_max[mapidx],
2596 &ctype->map_collection_act[mapidx],
2597 from_wch) = to_wch;
2598 }
2599
2600 /* Now comes a semicolon or the end of the line/file. */
2601 now = lr_token (ldfile, charmap, NULL);
2602 if (now->tok == tok_semicolon)
2603 now = lr_token (ldfile, charmap, NULL);
2604 }
2605 break;
2606
2607 case tok_translit_start:
2608 /* Ignore the rest of the line if we don't need the input of
2609 this line. */
2610 if (ignore_content)
2611 {
2612 lr_ignore_rest (ldfile, 0);
2613 break;
2614 }
2615
2616 /* The rest of the line better should be empty. */
2617 lr_ignore_rest (ldfile, 1);
2618
2619 /* We count here the number of allocated entries in the `translit'
2620 array. */
2621 cnt = 0;
2622
2623 ldfile->translate_strings = 1;
2624 ldfile->return_widestr = 1;
2625
2626 /* We proceed until we see the `translit_end' token. */
2627 while (now = lr_token (ldfile, charmap, repertoire),
2628 now->tok != tok_translit_end && now->tok != tok_eof)
2629 {
2630 if (now->tok == tok_eol)
2631 /* Ignore empty lines. */
2632 continue;
2633
2634 if (now->tok == tok_translit_end)
2635 {
2636 lr_ignore_rest (ldfile, 0);
2637 break;
2638 }
2639
2640 if (now->tok == tok_include)
2641 {
2642 /* We have to include locale. */
2643 const char *locale_name;
2644 const char *repertoire_name;
2645
2646 now = lr_token (ldfile, charmap, NULL);
2647 /* This should be a string or an identifier. In any
2648 case something to name a locale. */
2649 if (now->tok != tok_string && now->tok != tok_ident)
2650 {
2651 translit_syntax:
2652 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2653 lr_ignore_rest (ldfile, 0);
2654 continue;
2655 }
2656 locale_name = now->val.str.startmb;
2657
2658 /* Next should be a semicolon. */
2659 now = lr_token (ldfile, charmap, NULL);
2660 if (now->tok != tok_semicolon)
2661 goto translit_syntax;
2662
2663 /* Now the repertoire name. */
2664 now = lr_token (ldfile, charmap, NULL);
2665 if ((now->tok != tok_string && now->tok != tok_ident)
2666 || now->val.str.startmb == NULL)
2667 goto translit_syntax;
2668 repertoire_name = now->val.str.startmb;
2669
2670 /* We must not have more than one `include'. */
2671 if (ctype->translit_copy_locale != NULL)
2672 {
2673 lr_error (ldfile, _("\
2674 %s: only one `include' instruction allowed"), "LC_CTYPE");
2675 lr_ignore_rest (ldfile, 0);
2676 continue;
2677 }
2678
2679 ctype->translit_copy_locale = locale_name;
2680 ctype->translit_copy_repertoire = repertoire_name;
2681
2682 /* The rest of the line must be empty. */
2683 lr_ignore_rest (ldfile, 1);
2684
2685 /* Make sure the locale is read. */
2686 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2687 repertoire_name, 1, NULL);
2688 continue;
2689 }
2690 else if (now->tok == tok_default_missing)
2691 {
2692 uint32_t *wstr;
2693
2694 while (1)
2695 {
2696 /* We expect a single character or string as the
2697 argument. */
2698 now = lr_token (ldfile, charmap, NULL);
2699 wstr = read_widestring (ldfile, now, charmap,
2700 repertoire);
2701
2702 if (wstr != NULL)
2703 {
2704 if (ctype->default_missing != NULL)
2705 {
2706 lr_error (ldfile, _("\
2707 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2708 error_at_line (0, 0, ctype->default_missing_file,
2709 ctype->default_missing_lineno,
2710 _("\
2711 previous definition was here"));
2712 }
2713 else
2714 {
2715 ctype->default_missing = wstr;
2716 ctype->default_missing_file = ldfile->fname;
2717 ctype->default_missing_lineno = ldfile->lineno;
2718 }
2719 /* We can have more entries, ignore them. */
2720 lr_ignore_rest (ldfile, 0);
2721 break;
2722 }
2723 else if (wstr == (uint32_t *) -1l)
2724 /* This was an syntax error. */
2725 break;
2726
2727 /* Maybe there is another replacement we can use. */
2728 now = lr_token (ldfile, charmap, NULL);
2729 if (now->tok == tok_eol || now->tok == tok_eof)
2730 {
2731 /* Nothing found. We tell the user. */
2732 lr_error (ldfile, _("\
2733 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2734 break;
2735 }
2736 if (now->tok != tok_semicolon)
2737 goto translit_syntax;
2738 }
2739
2740 continue;
2741 }
2742 else if (now->tok == tok_translit_ignore)
2743 {
2744 read_translit_ignore_entry (ldfile, ctype, charmap,
2745 repertoire);
2746 continue;
2747 }
2748
2749 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2750 }
2751 ldfile->return_widestr = 0;
2752 break;
2753
2754 case tok_ident:
2755 /* Ignore the rest of the line if we don't need the input of
2756 this line. */
2757 if (ignore_content)
2758 {
2759 lr_ignore_rest (ldfile, 0);
2760 break;
2761 }
2762
2763 /* This could mean one of several things. First test whether
2764 it's a character class name. */
2765 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2766 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2767 break;
2768 if (cnt < ctype->nr_charclass)
2769 {
2770 class_bit = _ISwbit (cnt);
2771 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2772 free (now->val.str.startmb);
2773 goto read_charclass;
2774 }
2775 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2776 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2777 break;
2778 if (cnt < ctype->map_collection_nr)
2779 {
2780 mapidx = cnt;
2781 free (now->val.str.startmb);
2782 goto read_mapping;
2783 }
2784 #ifdef PREDEFINED_CLASSES
2785 if (strcmp (now->val.str.startmb, "special1") == 0)
2786 {
2787 class_bit = _ISwspecial1;
2788 free (now->val.str.startmb);
2789 goto read_charclass;
2790 }
2791 if (strcmp (now->val.str.startmb, "special2") == 0)
2792 {
2793 class_bit = _ISwspecial2;
2794 free (now->val.str.startmb);
2795 goto read_charclass;
2796 }
2797 if (strcmp (now->val.str.startmb, "special3") == 0)
2798 {
2799 class_bit = _ISwspecial3;
2800 free (now->val.str.startmb);
2801 goto read_charclass;
2802 }
2803 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2804 {
2805 mapidx = 2;
2806 goto read_mapping;
2807 }
2808 #endif
2809 break;
2810
2811 case tok_end:
2812 /* Next we assume `LC_CTYPE'. */
2813 now = lr_token (ldfile, charmap, NULL);
2814 if (now->tok == tok_eof)
2815 break;
2816 if (now->tok == tok_eol)
2817 lr_error (ldfile, _("%s: incomplete `END' line"),
2818 "LC_CTYPE");
2819 else if (now->tok != tok_lc_ctype)
2820 lr_error (ldfile, _("\
2821 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2822 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2823 return;
2824
2825 default:
2826 err_label:
2827 if (now->tok != tok_eof)
2828 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2829 }
2830
2831 /* Prepare for the next round. */
2832 now = lr_token (ldfile, charmap, NULL);
2833 nowtok = now->tok;
2834 }
2835
2836 /* When we come here we reached the end of the file. */
2837 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2838 }
2839
2840
2841 static void
2842 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2843 struct repertoire_t *repertoire)
2844 {
2845 size_t cnt;
2846
2847 /* These function defines the default values for the classes and conversions
2848 according to POSIX.2 2.5.2.1.
2849 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2850 Don't move them unless you know what you do! */
2851
2852 void set_default (int bitpos, int from, int to)
2853 {
2854 char tmp[2];
2855 int ch;
2856 int bit = _ISbit (bitpos);
2857 int bitw = _ISwbit (bitpos);
2858 /* Define string. */
2859 strcpy (tmp, "?");
2860
2861 for (ch = from; ch <= to; ++ch)
2862 {
2863 struct charseq *seq;
2864 tmp[0] = ch;
2865
2866 seq = charmap_find_value (charmap, tmp, 1);
2867 if (seq == NULL)
2868 {
2869 char buf[10];
2870 sprintf (buf, "U%08X", ch);
2871 seq = charmap_find_value (charmap, buf, 9);
2872 }
2873 if (seq == NULL)
2874 {
2875 if (!be_quiet)
2876 error (0, 0, _("\
2877 %s: character `%s' not defined in charmap while needed as default value"),
2878 "LC_CTYPE", tmp);
2879 }
2880 else if (seq->nbytes != 1)
2881 error (0, 0, _("\
2882 %s: character `%s' in charmap not representable with one byte"),
2883 "LC_CTYPE", tmp);
2884 else
2885 ctype->class256_collection[seq->bytes[0]] |= bit;
2886
2887 /* No need to search here, the ASCII value is also the Unicode
2888 value. */
2889 ELEM (ctype, class_collection, , ch) |= bitw;
2890 }
2891 }
2892
2893 /* Set default values if keyword was not present. */
2894 if ((ctype->class_done & BITw (tok_upper)) == 0)
2895 /* "If this keyword [lower] is not specified, the lowercase letters
2896 `A' through `Z', ..., shall automatically belong to this class,
2897 with implementation defined character values." [P1003.2, 2.5.2.1] */
2898 set_default (BITPOS (tok_upper), 'A', 'Z');
2899
2900 if ((ctype->class_done & BITw (tok_lower)) == 0)
2901 /* "If this keyword [lower] is not specified, the lowercase letters
2902 `a' through `z', ..., shall automatically belong to this class,
2903 with implementation defined character values." [P1003.2, 2.5.2.1] */
2904 set_default (BITPOS (tok_lower), 'a', 'z');
2905
2906 if ((ctype->class_done & BITw (tok_alpha)) == 0)
2907 {
2908 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2909 class `lower' *must* be in class `alpha'. */
2910 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2911 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2912
2913 for (cnt = 0; cnt < 256; ++cnt)
2914 if ((ctype->class256_collection[cnt] & mask) != 0)
2915 ctype->class256_collection[cnt] |= BIT (tok_alpha);
2916
2917 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2918 if ((ctype->class_collection[cnt] & maskw) != 0)
2919 ctype->class_collection[cnt] |= BITw (tok_alpha);
2920 }
2921
2922 if ((ctype->class_done & BITw (tok_digit)) == 0)
2923 /* "If this keyword [digit] is not specified, the digits `0' through
2924 `9', ..., shall automatically belong to this class, with
2925 implementation-defined character values." [P1003.2, 2.5.2.1] */
2926 set_default (BITPOS (tok_digit), '0', '9');
2927
2928 /* "Only characters specified for the `alpha' and `digit' keyword
2929 shall be specified. Characters specified for the keyword `alpha'
2930 and `digit' are automatically included in this class. */
2931 {
2932 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2933 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2934
2935 for (cnt = 0; cnt < 256; ++cnt)
2936 if ((ctype->class256_collection[cnt] & mask) != 0)
2937 ctype->class256_collection[cnt] |= BIT (tok_alnum);
2938
2939 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2940 if ((ctype->class_collection[cnt] & maskw) != 0)
2941 ctype->class_collection[cnt] |= BITw (tok_alnum);
2942 }
2943
2944 if ((ctype->class_done & BITw (tok_space)) == 0)
2945 /* "If this keyword [space] is not specified, the characters <space>,
2946 <form-feed>, <newline>, <carriage-return>, <tab>, and
2947 <vertical-tab>, ..., shall automatically belong to this class,
2948 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2949 {
2950 struct charseq *seq;
2951
2952 seq = charmap_find_value (charmap, "space", 5);
2953 if (seq == NULL)
2954 seq = charmap_find_value (charmap, "SP", 2);
2955 if (seq == NULL)
2956 seq = charmap_find_value (charmap, "U00000020", 9);
2957 if (seq == NULL)
2958 {
2959 if (!be_quiet)
2960 error (0, 0, _("\
2961 %s: character `%s' not defined while needed as default value"),
2962 "LC_CTYPE", "<space>");
2963 }
2964 else if (seq->nbytes != 1)
2965 error (0, 0, _("\
2966 %s: character `%s' in charmap not representable with one byte"),
2967 "LC_CTYPE", "<space>");
2968 else
2969 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2970
2971 /* No need to search. */
2972 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2973
2974 seq = charmap_find_value (charmap, "form-feed", 9);
2975 if (seq == NULL)
2976 seq = charmap_find_value (charmap, "U0000000C", 9);
2977 if (seq == NULL)
2978 {
2979 if (!be_quiet)
2980 error (0, 0, _("\
2981 %s: character `%s' not defined while needed as default value"),
2982 "LC_CTYPE", "<form-feed>");
2983 }
2984 else if (seq->nbytes != 1)
2985 error (0, 0, _("\
2986 %s: character `%s' in charmap not representable with one byte"),
2987 "LC_CTYPE", "<form-feed>");
2988 else
2989 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2990
2991 /* No need to search. */
2992 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
2993
2994
2995 seq = charmap_find_value (charmap, "newline", 7);
2996 if (seq == NULL)
2997 seq = charmap_find_value (charmap, "U0000000A", 9);
2998 if (seq == NULL)
2999 {
3000 if (!be_quiet)
3001 error (0, 0, _("\
3002 character `%s' not defined while needed as default value"),
3003 "<newline>");
3004 }
3005 else if (seq->nbytes != 1)
3006 error (0, 0, _("\
3007 %s: character `%s' in charmap not representable with one byte"),
3008 "LC_CTYPE", "<newline>");
3009 else
3010 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3011
3012 /* No need to search. */
3013 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3014
3015
3016 seq = charmap_find_value (charmap, "carriage-return", 15);
3017 if (seq == NULL)
3018 seq = charmap_find_value (charmap, "U0000000D", 9);
3019 if (seq == NULL)
3020 {
3021 if (!be_quiet)
3022 error (0, 0, _("\
3023 %s: character `%s' not defined while needed as default value"),
3024 "LC_CTYPE", "<carriage-return>");
3025 }
3026 else if (seq->nbytes != 1)
3027 error (0, 0, _("\
3028 %s: character `%s' in charmap not representable with one byte"),
3029 "LC_CTYPE", "<carriage-return>");
3030 else
3031 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3032
3033 /* No need to search. */
3034 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3035
3036
3037 seq = charmap_find_value (charmap, "tab", 3);
3038 if (seq == NULL)
3039 seq = charmap_find_value (charmap, "U00000009", 9);
3040 if (seq == NULL)
3041 {
3042 if (!be_quiet)
3043 error (0, 0, _("\
3044 %s: character `%s' not defined while needed as default value"),
3045 "LC_CTYPE", "<tab>");
3046 }
3047 else if (seq->nbytes != 1)
3048 error (0, 0, _("\
3049 %s: character `%s' in charmap not representable with one byte"),
3050 "LC_CTYPE", "<tab>");
3051 else
3052 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3053
3054 /* No need to search. */
3055 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3056
3057
3058 seq = charmap_find_value (charmap, "vertical-tab", 12);
3059 if (seq == NULL)
3060 seq = charmap_find_value (charmap, "U0000000B", 9);
3061 if (seq == NULL)
3062 {
3063 if (!be_quiet)
3064 error (0, 0, _("\
3065 %s: character `%s' not defined while needed as default value"),
3066 "LC_CTYPE", "<vertical-tab>");
3067 }
3068 else if (seq->nbytes != 1)
3069 error (0, 0, _("\
3070 %s: character `%s' in charmap not representable with one byte"),
3071 "LC_CTYPE", "<vertical-tab>");
3072 else
3073 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3074
3075 /* No need to search. */
3076 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3077 }
3078
3079 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3080 /* "If this keyword is not specified, the digits `0' to `9', the
3081 uppercase letters `A' through `F', and the lowercase letters `a'
3082 through `f', ..., shell automatically belong to this class, with
3083 implementation defined character values." [P1003.2, 2.5.2.1] */
3084 {
3085 set_default (BITPOS (tok_xdigit), '0', '9');
3086 set_default (BITPOS (tok_xdigit), 'A', 'F');
3087 set_default (BITPOS (tok_xdigit), 'a', 'f');
3088 }
3089
3090 if ((ctype->class_done & BITw (tok_blank)) == 0)
3091 /* "If this keyword [blank] is unspecified, the characters <space> and
3092 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3093 {
3094 struct charseq *seq;
3095
3096 seq = charmap_find_value (charmap, "space", 5);
3097 if (seq == NULL)
3098 seq = charmap_find_value (charmap, "SP", 2);
3099 if (seq == NULL)
3100 seq = charmap_find_value (charmap, "U00000020", 9);
3101 if (seq == NULL)
3102 {
3103 if (!be_quiet)
3104 error (0, 0, _("\
3105 %s: character `%s' not defined while needed as default value"),
3106 "LC_CTYPE", "<space>");
3107 }
3108 else if (seq->nbytes != 1)
3109 error (0, 0, _("\
3110 %s: character `%s' in charmap not representable with one byte"),
3111 "LC_CTYPE", "<space>");
3112 else
3113 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3114
3115 /* No need to search. */
3116 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3117
3118
3119 seq = charmap_find_value (charmap, "tab", 3);
3120 if (seq == NULL)
3121 seq = charmap_find_value (charmap, "U00000009", 9);
3122 if (seq == NULL)
3123 {
3124 if (!be_quiet)
3125 error (0, 0, _("\
3126 %s: character `%s' not defined while needed as default value"),
3127 "LC_CTYPE", "<tab>");
3128 }
3129 else if (seq->nbytes != 1)
3130 error (0, 0, _("\
3131 %s: character `%s' in charmap not representable with one byte"),
3132 "LC_CTYPE", "<tab>");
3133 else
3134 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3135
3136 /* No need to search. */
3137 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3138 }
3139
3140 if ((ctype->class_done & BITw (tok_graph)) == 0)
3141 /* "If this keyword [graph] is not specified, characters specified for
3142 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3143 shall belong to this character class." [P1003.2, 2.5.2.1] */
3144 {
3145 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3146 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3147 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3148 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3149 BITw (tok_punct);
3150 size_t cnt;
3151
3152 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3153 if ((ctype->class_collection[cnt] & maskw) != 0)
3154 ctype->class_collection[cnt] |= BITw (tok_graph);
3155
3156 for (cnt = 0; cnt < 256; ++cnt)
3157 if ((ctype->class256_collection[cnt] & mask) != 0)
3158 ctype->class256_collection[cnt] |= BIT (tok_graph);
3159 }
3160
3161 if ((ctype->class_done & BITw (tok_print)) == 0)
3162 /* "If this keyword [print] is not provided, characters specified for
3163 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3164 and the <space> character shall belong to this character class."
3165 [P1003.2, 2.5.2.1] */
3166 {
3167 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3168 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3169 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3170 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3171 BITw (tok_punct);
3172 size_t cnt;
3173 struct charseq *seq;
3174
3175 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3176 if ((ctype->class_collection[cnt] & maskw) != 0)
3177 ctype->class_collection[cnt] |= BITw (tok_print);
3178
3179 for (cnt = 0; cnt < 256; ++cnt)
3180 if ((ctype->class256_collection[cnt] & mask) != 0)
3181 ctype->class256_collection[cnt] |= BIT (tok_print);
3182
3183
3184 seq = charmap_find_value (charmap, "space", 5);
3185 if (seq == NULL)
3186 seq = charmap_find_value (charmap, "SP", 2);
3187 if (seq == NULL)
3188 seq = charmap_find_value (charmap, "U00000020", 9);
3189 if (seq == NULL)
3190 {
3191 if (!be_quiet)
3192 error (0, 0, _("\
3193 %s: character `%s' not defined while needed as default value"),
3194 "LC_CTYPE", "<space>");
3195 }
3196 else if (seq->nbytes != 1)
3197 error (0, 0, _("\
3198 %s: character `%s' in charmap not representable with one byte"),
3199 "LC_CTYPE", "<space>");
3200 else
3201 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3202
3203 /* No need to search. */
3204 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3205 }
3206
3207 if (ctype->tomap_done[0] == 0)
3208 /* "If this keyword [toupper] is not specified, the lowercase letters
3209 `a' through `z', and their corresponding uppercase letters `A' to
3210 `Z', ..., shall automatically be included, with implementation-
3211 defined character values." [P1003.2, 2.5.2.1] */
3212 {
3213 char tmp[4];
3214 int ch;
3215
3216 strcpy (tmp, "<?>");
3217
3218 for (ch = 'a'; ch <= 'z'; ++ch)
3219 {
3220 struct charseq *seq_from, *seq_to;
3221
3222 tmp[1] = (char) ch;
3223
3224 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3225 if (seq_from == NULL)
3226 {
3227 char buf[10];
3228 sprintf (buf, "U%08X", ch);
3229 seq_from = charmap_find_value (charmap, buf, 9);
3230 }
3231 if (seq_from == NULL)
3232 {
3233 if (!be_quiet)
3234 error (0, 0, _("\
3235 %s: character `%s' not defined while needed as default value"),
3236 "LC_CTYPE", tmp);
3237 }
3238 else if (seq_from->nbytes != 1)
3239 {
3240 if (!be_quiet)
3241 error (0, 0, _("\
3242 %s: character `%s' needed as default value not representable with one byte"),
3243 "LC_CTYPE", tmp);
3244 }
3245 else
3246 {
3247 /* This conversion is implementation defined. */
3248 tmp[1] = (char) (ch + ('A' - 'a'));
3249 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3250 if (seq_to == NULL)
3251 {
3252 char buf[10];
3253 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3254 seq_to = charmap_find_value (charmap, buf, 9);
3255 }
3256 if (seq_to == NULL)
3257 {
3258 if (!be_quiet)
3259 error (0, 0, _("\
3260 %s: character `%s' not defined while needed as default value"),
3261 "LC_CTYPE", tmp);
3262 }
3263 else if (seq_to->nbytes != 1)
3264 {
3265 if (!be_quiet)
3266 error (0, 0, _("\
3267 %s: character `%s' needed as default value not representable with one byte"),
3268 "LC_CTYPE", tmp);
3269 }
3270 else
3271 /* The index [0] is determined by the order of the
3272 `ctype_map_newP' calls in `ctype_startup'. */
3273 ctype->map256_collection[0][seq_from->bytes[0]]
3274 = seq_to->bytes[0];
3275 }
3276
3277 /* No need to search. */
3278 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3279 }
3280 }
3281
3282 if (ctype->tomap_done[1] == 0)
3283 /* "If this keyword [tolower] is not specified, the mapping shall be
3284 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3285 {
3286 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3287 if (ctype->map_collection[0][cnt] != 0)
3288 ELEM (ctype, map_collection, [1],
3289 ctype->map_collection[0][cnt])
3290 = ctype->charnames[cnt];
3291
3292 for (cnt = 0; cnt < 256; ++cnt)
3293 if (ctype->map256_collection[0][cnt] != 0)
3294 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3295 }
3296
3297 if (ctype->outdigits_act != 10)
3298 {
3299 if (ctype->outdigits_act != 0)
3300 error (0,0, _("%s: field `%s' does not contain exactly ten entries"),
3301 "LC_CTYPE", "outdigit");
3302
3303 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3304 {
3305 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3306 digits + cnt, 1);
3307
3308 if (ctype->mboutdigits[cnt] == NULL)
3309 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3310 longnames[cnt],
3311 strlen (longnames[cnt]));
3312
3313 if (ctype->mboutdigits[cnt] == NULL)
3314 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3315 uninames[cnt], 9);
3316
3317 if (ctype->mboutdigits[cnt] == NULL)
3318 {
3319 /* Provide a replacement. */
3320 error (0, 0, _("\
3321 no output digits defined and none of the standard names in the charmap"));
3322
3323 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3324 sizeof (struct charseq)
3325 + 1);
3326
3327 /* This is better than nothing. */
3328 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3329 ctype->mboutdigits[cnt]->nbytes = 1;
3330 }
3331
3332 ctype->wcoutdigits[cnt] = L'0' + cnt;
3333 }
3334
3335 ctype->outdigits_act = 10;
3336 }
3337 }
3338
3339
3340 /* Construction of sparse 3-level tables.
3341 See wchar-lookup.h for their structure and the meaning of p and q. */
3342
3343 struct wctype_table
3344 {
3345 /* Parameters. */
3346 unsigned int p;
3347 unsigned int q;
3348 /* Working representation. */
3349 size_t level1_alloc;
3350 size_t level1_size;
3351 uint32_t *level1;
3352 size_t level2_alloc;
3353 size_t level2_size;
3354 uint32_t *level2;
3355 size_t level3_alloc;
3356 size_t level3_size;
3357 uint32_t *level3;
3358 /* Compressed representation. */
3359 size_t result_size;
3360 char *result;
3361 };
3362
3363 /* Initialize. Assumes t->p and t->q have already been set. */
3364 static inline void
3365 wctype_table_init (struct wctype_table *t)
3366 {
3367 t->level1_alloc = t->level1_size = 0;
3368 t->level2_alloc = t->level2_size = 0;
3369 t->level3_alloc = t->level3_size = 0;
3370 }
3371
3372 /* Retrieve an entry. */
3373 static inline int
3374 wctype_table_get (struct wctype_table *t, uint32_t wc)
3375 {
3376 uint32_t index1 = wc >> (t->q + t->p + 5);
3377 if (index1 < t->level1_size)
3378 {
3379 uint32_t lookup1 = t->level1[index1];
3380 if (lookup1 != ~((uint32_t) 0))
3381 {
3382 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3383 + (lookup1 << t->q);
3384 uint32_t lookup2 = t->level2[index2];
3385 if (lookup2 != ~((uint32_t) 0))
3386 {
3387 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3388 + (lookup2 << t->p);
3389 uint32_t lookup3 = t->level3[index3];
3390 uint32_t index4 = wc & 0x1f;
3391
3392 return (lookup3 >> index4) & 1;
3393 }
3394 }
3395 }
3396 return 0;
3397 }
3398
3399 /* Add one entry. */
3400 static void
3401 wctype_table_add (struct wctype_table *t, uint32_t wc)
3402 {
3403 uint32_t index1 = wc >> (t->q + t->p + 5);
3404 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3405 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3406 uint32_t index4 = wc & 0x1f;
3407 size_t i, i1, i2;
3408
3409 if (index1 >= t->level1_size)
3410 {
3411 if (index1 >= t->level1_alloc)
3412 {
3413 size_t alloc = 2 * t->level1_alloc;
3414 if (alloc <= index1)
3415 alloc = index1 + 1;
3416 t->level1 = (t->level1_alloc > 0
3417 ? (uint32_t *) xrealloc ((char *) t->level1,
3418 alloc * sizeof (uint32_t))
3419 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3420 t->level1_alloc = alloc;
3421 }
3422 while (index1 >= t->level1_size)
3423 t->level1[t->level1_size++] = ~((uint32_t) 0);
3424 }
3425
3426 if (t->level1[index1] == ~((uint32_t) 0))
3427 {
3428 if (t->level2_size == t->level2_alloc)
3429 {
3430 size_t alloc = 2 * t->level2_alloc + 1;
3431 t->level2 = (t->level2_alloc > 0
3432 ? (uint32_t *) xrealloc ((char *) t->level2,
3433 (alloc << t->q) * sizeof (uint32_t))
3434 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3435 t->level2_alloc = alloc;
3436 }
3437 i1 = t->level2_size << t->q;
3438 i2 = (t->level2_size + 1) << t->q;
3439 for (i = i1; i < i2; i++)
3440 t->level2[i] = ~((uint32_t) 0);
3441 t->level1[index1] = t->level2_size++;
3442 }
3443
3444 index2 += t->level1[index1] << t->q;
3445
3446 if (t->level2[index2] == ~((uint32_t) 0))
3447 {
3448 if (t->level3_size == t->level3_alloc)
3449 {
3450 size_t alloc = 2 * t->level3_alloc + 1;
3451 t->level3 = (t->level3_alloc > 0
3452 ? (uint32_t *) xrealloc ((char *) t->level3,
3453 (alloc << t->p) * sizeof (uint32_t))
3454 : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
3455 t->level3_alloc = alloc;
3456 }
3457 i1 = t->level3_size << t->p;
3458 i2 = (t->level3_size + 1) << t->p;
3459 for (i = i1; i < i2; i++)
3460 t->level3[i] = 0;
3461 t->level2[index2] = t->level3_size++;
3462 }
3463
3464 index3 += t->level2[index2] << t->p;
3465
3466 t->level3[index3] |= (uint32_t)1 << index4;
3467 }
3468
3469 /* Finalize and shrink. */
3470 static void
3471 wctype_table_finalize (struct wctype_table *t)
3472 {
3473 size_t i, j, k;
3474 uint32_t reorder3[t->level3_size];
3475 uint32_t reorder2[t->level2_size];
3476 uint32_t level1_offset, level2_offset, level3_offset;
3477
3478 /* Uniquify level3 blocks. */
3479 k = 0;
3480 for (j = 0; j < t->level3_size; j++)
3481 {
3482 for (i = 0; i < k; i++)
3483 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3484 (1 << t->p) * sizeof (uint32_t)) == 0)
3485 break;
3486 /* Relocate block j to block i. */
3487 reorder3[j] = i;
3488 if (i == k)
3489 {
3490 if (i != j)
3491 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3492 (1 << t->p) * sizeof (uint32_t));
3493 k++;
3494 }
3495 }
3496 t->level3_size = k;
3497
3498 for (i = 0; i < (t->level2_size << t->q); i++)
3499 if (t->level2[i] != ~((uint32_t) 0))
3500 t->level2[i] = reorder3[t->level2[i]];
3501
3502 /* Uniquify level2 blocks. */
3503 k = 0;
3504 for (j = 0; j < t->level2_size; j++)
3505 {
3506 for (i = 0; i < k; i++)
3507 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3508 (1 << t->q) * sizeof (uint32_t)) == 0)
3509 break;
3510 /* Relocate block j to block i. */
3511 reorder2[j] = i;
3512 if (i == k)
3513 {
3514 if (i != j)
3515 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3516 (1 << t->q) * sizeof (uint32_t));
3517 k++;
3518 }
3519 }
3520 t->level2_size = k;
3521
3522 for (i = 0; i < t->level1_size; i++)
3523 if (t->level1[i] != ~((uint32_t) 0))
3524 t->level1[i] = reorder2[t->level1[i]];
3525
3526 /* Create and fill the resulting compressed representation. */
3527 t->result_size =
3528 5 * sizeof (uint32_t)
3529 + t->level1_size * sizeof (uint32_t)
3530 + (t->level2_size << t->q) * sizeof (uint32_t)
3531 + (t->level3_size << t->p) * sizeof (uint32_t);
3532 t->result = (char *) xmalloc (t->result_size);
3533
3534 level1_offset =
3535 5 * sizeof (uint32_t);
3536 level2_offset =
3537 5 * sizeof (uint32_t)
3538 + t->level1_size * sizeof (uint32_t);
3539 level3_offset =
3540 5 * sizeof (uint32_t)
3541 + t->level1_size * sizeof (uint32_t)
3542 + (t->level2_size << t->q) * sizeof (uint32_t);
3543
3544 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3545 ((uint32_t *) t->result)[1] = t->level1_size;
3546 ((uint32_t *) t->result)[2] = t->p + 5;
3547 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3548 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3549
3550 for (i = 0; i < t->level1_size; i++)
3551 ((uint32_t *) (t->result + level1_offset))[i] =
3552 (t->level1[i] == ~((uint32_t) 0)
3553 ? 0
3554 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3555
3556 for (i = 0; i < (t->level2_size << t->q); i++)
3557 ((uint32_t *) (t->result + level2_offset))[i] =
3558 (t->level2[i] == ~((uint32_t) 0)
3559 ? 0
3560 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3561
3562 for (i = 0; i < (t->level3_size << t->p); i++)
3563 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3564
3565 if (t->level1_alloc > 0)
3566 free (t->level1);
3567 if (t->level2_alloc > 0)
3568 free (t->level2);
3569 if (t->level3_alloc > 0)
3570 free (t->level3);
3571 }
3572
3573 #define TABLE wcwidth_table
3574 #define ELEMENT uint8_t
3575 #define DEFAULT 0xff
3576 #include "3level.h"
3577
3578 #define TABLE wctrans_table
3579 #define ELEMENT int32_t
3580 #define DEFAULT 0
3581 #define wctrans_table_add wctrans_table_add_internal
3582 #include "3level.h"
3583 #undef wctrans_table_add
3584 /* The wctrans_table must actually store the difference between the
3585 desired result and the argument. */
3586 static inline void
3587 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3588 {
3589 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3590 }
3591
3592
3593 static void
3594 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3595 struct repertoire_t *repertoire)
3596 {
3597 size_t idx, nr;
3598 const void *key;
3599 size_t len;
3600 void *vdata;
3601 void *curs;
3602
3603 /* You wonder about this amount of memory? This is only because some
3604 users do not manage to address the array with unsigned values or
3605 data types with range >= 256. '\200' would result in the array
3606 index -128. To help these poor people we duplicate the entries for
3607 128 up to 255 below the entry for \0. */
3608 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3609 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3610 ctype->class_b = (uint32_t **)
3611 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3612 ctype->class_3level = (struct iovec *)
3613 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3614
3615 /* This is the array accessed using the multibyte string elements. */
3616 for (idx = 0; idx < 256; ++idx)
3617 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3618
3619 /* Mirror first 127 entries. We must take care that entry -1 is not
3620 mirrored because EOF == -1. */
3621 for (idx = 0; idx < 127; ++idx)
3622 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3623
3624 /* The 32 bit array contains all characters < 0x100. */
3625 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3626 if (ctype->charnames[idx] < 0x100)
3627 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3628
3629 for (nr = 0; nr < ctype->nr_charclass; nr++)
3630 {
3631 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3632
3633 for (idx = 0; idx < 256; ++idx)
3634 if (ctype->class256_collection[idx] & _ISbit (nr))
3635 ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3636 }
3637
3638 for (nr = 0; nr < ctype->nr_charclass; nr++)
3639 {
3640 struct wctype_table t;
3641
3642 t.p = 4; /* or: 5 */
3643 t.q = 7; /* or: 6 */
3644 wctype_table_init (&t);
3645
3646 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3647 if (ctype->class_collection[idx] & _ISwbit (nr))
3648 wctype_table_add (&t, ctype->charnames[idx]);
3649
3650 wctype_table_finalize (&t);
3651
3652 if (verbose)
3653 fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
3654 "LC_CTYPE", ctype->classnames[nr],
3655 (unsigned long int) t.result_size);
3656
3657 ctype->class_3level[nr].iov_base = t.result;
3658 ctype->class_3level[nr].iov_len = t.result_size;
3659 }
3660
3661 /* Room for table of mappings. */
3662 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3663 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3664 * sizeof (uint32_t *));
3665 ctype->map_3level = (struct iovec *)
3666 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3667
3668 /* Fill in all mappings. */
3669 for (idx = 0; idx < 2; ++idx)
3670 {
3671 unsigned int idx2;
3672
3673 /* Allocate table. */
3674 ctype->map_b[idx] = (uint32_t *)
3675 xmalloc ((256 + 128) * sizeof (uint32_t));
3676
3677 /* Copy values from collection. */
3678 for (idx2 = 0; idx2 < 256; ++idx2)
3679 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3680
3681 /* Mirror first 127 entries. We must take care not to map entry
3682 -1 because EOF == -1. */
3683 for (idx2 = 0; idx2 < 127; ++idx2)
3684 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3685
3686 /* EOF must map to EOF. */
3687 ctype->map_b[idx][127] = EOF;
3688 }
3689
3690 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3691 {
3692 unsigned int idx2;
3693
3694 /* Allocate table. */
3695 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3696
3697 /* Copy values from collection. Default is identity mapping. */
3698 for (idx2 = 0; idx2 < 256; ++idx2)
3699 ctype->map32_b[idx][idx2] =
3700 (ctype->map_collection[idx][idx2] != 0
3701 ? ctype->map_collection[idx][idx2]
3702 : idx2);
3703 }
3704
3705 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3706 {
3707 struct wctrans_table t;
3708
3709 t.p = 7;
3710 t.q = 9;
3711 wctrans_table_init (&t);
3712
3713 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3714 if (ctype->map_collection[nr][idx] != 0)
3715 wctrans_table_add (&t, ctype->charnames[idx],
3716 ctype->map_collection[nr][idx]);
3717
3718 wctrans_table_finalize (&t);
3719
3720 if (verbose)
3721 fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
3722 "LC_CTYPE", ctype->mapnames[nr],
3723 (unsigned long int) t.result_size);
3724
3725 ctype->map_3level[nr].iov_base = t.result;
3726 ctype->map_3level[nr].iov_len = t.result_size;
3727 }
3728
3729 /* Extra array for class and map names. */
3730 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3731 * sizeof (uint32_t));
3732 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3733 * sizeof (uint32_t));
3734
3735 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3736 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3737
3738 /* Array for width information. Because the expected width are very
3739 small we use only one single byte. This saves space. */
3740 {
3741 struct wcwidth_table t;
3742
3743 t.p = 7;
3744 t.q = 9;
3745 wcwidth_table_init (&t);
3746
3747 /* First set all the characters of the character set to the default width. */
3748 curs = NULL;
3749 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3750 {
3751 struct charseq *data = (struct charseq *) vdata;
3752
3753 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3754 data->ucs4 = repertoire_find_value (ctype->repertoire,
3755 data->name, len);
3756
3757 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3758 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3759 }
3760
3761 /* Now add the explicitly specified widths. */
3762 if (charmap->width_rules != NULL)
3763 {
3764 size_t cnt;
3765
3766 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3767 {
3768 unsigned char bytes[charmap->mb_cur_max];
3769 int nbytes = charmap->width_rules[cnt].from->nbytes;
3770
3771 /* We have the range of character for which the width is
3772 specified described using byte sequences of the multibyte
3773 charset. We have to convert this to UCS4 now. And we
3774 cannot simply convert the beginning and the end of the
3775 sequence, we have to iterate over the byte sequence and
3776 convert it for every single character. */
3777 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3778
3779 while (nbytes < charmap->width_rules[cnt].to->nbytes
3780 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3781 nbytes) <= 0)
3782 {
3783 /* Find the UCS value for `bytes'. */
3784 int inner;
3785 uint32_t wch;
3786 struct charseq *seq =
3787 charmap_find_symbol (charmap, bytes, nbytes);
3788
3789 if (seq == NULL)
3790 wch = ILLEGAL_CHAR_VALUE;
3791 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3792 wch = seq->ucs4;
3793 else
3794 wch = repertoire_find_value (ctype->repertoire, seq->name,
3795 strlen (seq->name));
3796
3797 if (wch != ILLEGAL_CHAR_VALUE)
3798 /* Store the value. */
3799 wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
3800
3801 /* "Increment" the bytes sequence. */
3802 inner = nbytes - 1;
3803 while (inner >= 0 && bytes[inner] == 0xff)
3804 --inner;
3805
3806 if (inner < 0)
3807 {
3808 /* We have to extend the byte sequence. */
3809 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3810 break;
3811
3812 bytes[0] = 1;
3813 memset (&bytes[1], 0, nbytes);
3814 ++nbytes;
3815 }
3816 else
3817 {
3818 ++bytes[inner];
3819 while (++inner < nbytes)
3820 bytes[inner] = 0;
3821 }
3822 }
3823 }
3824 }
3825
3826 wcwidth_table_finalize (&t);
3827
3828 if (verbose)
3829 fprintf (stderr, _("%s: table for width: %lu bytes\n"),
3830 "LC_CTYPE", (unsigned long int) t.result_size);
3831
3832 ctype->width.iov_base = t.result;
3833 ctype->width.iov_len = t.result_size;
3834 }
3835
3836 /* Set MB_CUR_MAX. */
3837 ctype->mb_cur_max = charmap->mb_cur_max;
3838
3839 /* Now determine the table for the transliteration information.
3840
3841 XXX It is not yet clear to me whether it is worth implementing a
3842 complicated algorithm which uses a hash table to locate the entries.
3843 For now I'll use a simple array which can be searching using binary
3844 search. */
3845 if (ctype->translit_copy_locale != NULL)
3846 {
3847 /* Fold in the transliteration information from the locale mentioned
3848 in the `include' statement. */
3849 struct locale_ctype_t *here = ctype;
3850
3851 do
3852 {
3853 struct localedef_t *other = find_locale (LC_CTYPE,
3854 here->translit_copy_locale,
3855 repertoire->name, charmap);
3856
3857 if (other == NULL)
3858 {
3859 error (0, 0, _("\
3860 %s: transliteration data from locale `%s' not available"),
3861 "LC_CTYPE", here->translit_copy_locale);
3862 break;
3863 }
3864
3865 here = other->categories[LC_CTYPE].ctype;
3866
3867 /* Enqueue the information if necessary. */
3868 if (here->translit != NULL)
3869 {
3870 struct translit_t *endp = here->translit;
3871 while (endp->next != NULL)
3872 endp = endp->next;
3873
3874 endp->next = ctype->translit;
3875 ctype->translit = here->translit;
3876 }
3877 }
3878 while (here->translit_copy_locale != NULL);
3879 }
3880
3881 if (ctype->translit != NULL)
3882 {
3883 /* First count how many entries we have. This is the upper limit
3884 since some entries from the included files might be overwritten. */
3885 size_t number = 0;
3886 size_t cnt;
3887 struct translit_t *runp = ctype->translit;
3888 struct translit_t **sorted;
3889 size_t from_len, to_len;
3890
3891 while (runp != NULL)
3892 {
3893 ++number;
3894 runp = runp->next;
3895 }
3896
3897 /* Next we allocate an array large enough and fill in the values. */
3898 sorted = (struct translit_t **) alloca (number
3899 * sizeof (struct translit_t **));
3900 runp = ctype->translit;
3901 number = 0;
3902 do
3903 {
3904 /* Search for the place where to insert this string.
3905 XXX Better use a real sorting algorithm later. */
3906 size_t idx = 0;
3907 int replace = 0;
3908
3909 while (idx < number)
3910 {
3911 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3912 (const wchar_t *) runp->from);
3913 if (res == 0)
3914 {
3915 replace = 1;
3916 break;
3917 }
3918 if (res > 0)
3919 break;
3920 ++idx;
3921 }
3922
3923 if (replace)
3924 sorted[idx] = runp;
3925 else
3926 {
3927 memmove (&sorted[idx + 1], &sorted[idx],
3928 (number - idx) * sizeof (struct translit_t *));
3929 sorted[idx] = runp;
3930 ++number;
3931 }
3932
3933 runp = runp->next;
3934 }
3935 while (runp != NULL);
3936
3937 /* The next step is putting all the possible transliteration
3938 strings in one memory block so that we can write it out.
3939 We need several different blocks:
3940 - index to the from-string array
3941 - from-string array
3942 - index to the to-string array
3943 - to-string array.
3944 */
3945 from_len = to_len = 0;
3946 for (cnt = 0; cnt < number; ++cnt)
3947 {
3948 struct translit_to_t *srunp;
3949 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3950 srunp = sorted[cnt]->to;
3951 while (srunp != NULL)
3952 {
3953 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3954 srunp = srunp->next;
3955 }
3956 /* Plus one for the extra NUL character marking the end of
3957 the list for the current entry. */
3958 ++to_len;
3959 }
3960
3961 /* We can allocate the arrays for the results. */
3962 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3963 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3964 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3965 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3966
3967 from_len = 0;
3968 to_len = 0;
3969 for (cnt = 0; cnt < number; ++cnt)
3970 {
3971 size_t len;
3972 struct translit_to_t *srunp;
3973
3974 ctype->translit_from_idx[cnt] = from_len;
3975 ctype->translit_to_idx[cnt] = to_len;
3976
3977 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3978 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3979 (const wchar_t *) sorted[cnt]->from, len);
3980 from_len += len;
3981
3982 ctype->translit_to_idx[cnt] = to_len;
3983 srunp = sorted[cnt]->to;
3984 while (srunp != NULL)
3985 {
3986 len = wcslen ((const wchar_t *) srunp->str) + 1;
3987 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3988 (const wchar_t *) srunp->str, len);
3989 to_len += len;
3990 srunp = srunp->next;
3991 }
3992 ctype->translit_to_tbl[to_len++] = L'\0';
3993 }
3994
3995 /* Store the information about the length. */
3996 ctype->translit_idx_size = number;
3997 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3998 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3999 }
4000 else
4001 {
4002 /* Provide some dummy pointers since we have nothing to write out. */
4003 static uint32_t no_str = { 0 };
4004
4005 ctype->translit_from_idx = &no_str;
4006 ctype->translit_from_tbl = &no_str;
4007 ctype->translit_to_tbl = &no_str;
4008 ctype->translit_idx_size = 0;
4009 ctype->translit_from_tbl_size = 0;
4010 ctype->translit_to_tbl_size = 0;
4011 }
4012 }