]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
01ff9d0b 1/* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
4b10dd6c 25#include <byteswap.h>
19bc17a9 26#include <endian.h>
4b10dd6c 27#include <errno.h>
19bc17a9 28#include <limits.h>
4b10dd6c
UD
29#include <obstack.h>
30#include <stdlib.h>
19bc17a9 31#include <string.h>
4b10dd6c
UD
32#include <wchar.h>
33#include <wctype.h>
34#include <sys/uio.h>
19bc17a9 35
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c
UD
41#include "locfile.h"
42#include "localedef.h"
19bc17a9 43
19bc17a9
RM
44#include <assert.h>
45
46
011ebfab 47#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
48/* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
011ebfab
UD
50# define _ISwspecial1 (1 << 29)
51# define _ISwspecial2 (1 << 30)
52# define _ISwspecial3 (1 << 31)
53#endif
19bc17a9
RM
54
55
56/* The bit used for representing a special class. */
57#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
58#define BIT(class) (_ISbit (BITPOS (class)))
59#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
60
61#define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
19bc17a9
RM
65
66/* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
4b10dd6c 69#define char_class_t uint16_t
4b10dd6c 70#define char_class32_t uint32_t
4b10dd6c
UD
71
72
73/* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77struct translit_to_t
78{
79 uint32_t *str;
80
81 struct translit_to_t *next;
82};
83
84struct translit_t
85{
86 uint32_t *from;
87
a673fbcb
UD
88 const char *fname;
89 size_t lineno;
90
4b10dd6c
UD
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94};
19bc17a9 95
a673fbcb
UD
96struct translit_ignore_t
97{
98 uint32_t from;
99 uint32_t to;
a0dc5206 100 uint32_t step;
a673fbcb
UD
101
102 const char *fname;
103 size_t lineno;
104
105 struct translit_ignore_t *next;
106};
107
19bc17a9
RM
108
109/* The real definition of the struct for the LC_CTYPE locale. */
110struct locale_ctype_t
111{
4b10dd6c 112 uint32_t *charnames;
19bc17a9
RM
113 size_t charnames_max;
114 size_t charnames_act;
115
4b10dd6c
UD
116 struct repertoire_t *repertoire;
117
118 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
119#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 120 size_t nr_charclass;
19bc17a9 121 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
122 uint32_t last_class_char;
123 uint32_t class256_collection[256];
124 uint32_t *class_collection;
19bc17a9
RM
125 size_t class_collection_max;
126 size_t class_collection_act;
4b10dd6c 127 uint32_t class_done;
ef446144 128 uint32_t class_offset;
4b10dd6c
UD
129
130 struct charseq **mbdigits;
131 size_t mbdigits_act;
132 size_t mbdigits_max;
133 uint32_t *wcdigits;
134 size_t wcdigits_act;
135 size_t wcdigits_max;
136
137 struct charseq *mboutdigits[10];
138 uint32_t wcoutdigits[10];
139 size_t outdigits_act;
19bc17a9
RM
140
141 /* If the following number ever turns out to be too small simply
142 increase it. But I doubt it will. --drepper@gnu */
143#define MAX_NR_CHARMAP 16
144 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
145 uint32_t *map_collection[MAX_NR_CHARMAP];
146 uint32_t map256_collection[2][256];
9a0a462c
UD
147 size_t map_collection_max[MAX_NR_CHARMAP];
148 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
149 size_t map_collection_nr;
150 size_t last_map_idx;
4b10dd6c 151 int tomap_done[MAX_NR_CHARMAP];
ef446144 152 uint32_t map_offset;
4b10dd6c
UD
153
154 /* Transliteration information. */
155 const char *translit_copy_locale;
156 const char *translit_copy_repertoire;
157 struct translit_t *translit;
a673fbcb 158 struct translit_ignore_t *translit_ignore;
a8e4c924 159 uint32_t ntranslit_ignore;
a673fbcb
UD
160
161 uint32_t *default_missing;
162 const char *default_missing_file;
163 size_t default_missing_lineno;
19bc17a9
RM
164
165 /* The arrays for the binary representation. */
4b10dd6c
UD
166 uint32_t plane_size;
167 uint32_t plane_cnt;
19bc17a9
RM
168 char_class_t *ctype_b;
169 char_class32_t *ctype32_b;
4a33c2f5
UD
170 uint32_t *names;
171 uint32_t **map;
49f2be5b 172 uint32_t **map32;
ef446144
UD
173 struct iovec *class_3level;
174 struct iovec *map_3level;
4b10dd6c
UD
175 uint32_t *class_name_ptr;
176 uint32_t *map_name_ptr;
75cd5204 177 unsigned char *width;
ef446144 178 struct iovec width_3level;
4b10dd6c 179 uint32_t mb_cur_max;
6990326c 180 const char *codeset_name;
4a33c2f5
UD
181 uint32_t *translit_from_idx;
182 uint32_t *translit_from_tbl;
183 uint32_t *translit_to_idx;
184 uint32_t *translit_to_tbl;
04fbc779 185 uint32_t translit_idx_size;
4b10dd6c
UD
186 size_t translit_from_tbl_size;
187 size_t translit_to_tbl_size;
188
a673fbcb 189 struct obstack mempool;
19bc17a9
RM
190};
191
192
4b10dd6c
UD
193#define obstack_chunk_alloc xmalloc
194#define obstack_chunk_free free
195
196
19bc17a9 197/* Prototypes for local functions. */
4b10dd6c
UD
198static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
199 struct charmap_t *charmap, int ignore_content);
200static void ctype_class_new (struct linereader *lr,
201 struct locale_ctype_t *ctype, const char *name);
202static void ctype_map_new (struct linereader *lr,
203 struct locale_ctype_t *ctype,
204 const char *name, struct charmap_t *charmap);
205static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
206 size_t *max, size_t *act, unsigned int idx);
19bc17a9 207static void set_class_defaults (struct locale_ctype_t *ctype,
4b10dd6c
UD
208 struct charmap_t *charmap,
209 struct repertoire_t *repertoire);
75cd5204 210static void allocate_arrays (struct locale_ctype_t *ctype,
4b10dd6c
UD
211 struct charmap_t *charmap,
212 struct repertoire_t *repertoire);
19bc17a9
RM
213
214
4b10dd6c
UD
215static const char *longnames[] =
216{
217 "zero", "one", "two", "three", "four",
218 "five", "six", "seven", "eight", "nine"
219};
1b97149d
UD
220static const char *uninames[] =
221{
222 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
223 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
224};
4b10dd6c
UD
225static const unsigned char digits[] = "0123456789";
226
227
228static void
19bc17a9 229ctype_startup (struct linereader *lr, struct localedef_t *locale,
4b10dd6c 230 struct charmap_t *charmap, int ignore_content)
19bc17a9
RM
231{
232 unsigned int cnt;
233 struct locale_ctype_t *ctype;
234
4b10dd6c 235 if (!ignore_content)
19bc17a9 236 {
4b10dd6c
UD
237 /* Allocate the needed room. */
238 locale->categories[LC_CTYPE].ctype = ctype =
239 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
240
241 /* We have seen no names yet. */
242 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
243 ctype->charnames =
244 (unsigned int *) xmalloc (ctype->charnames_max
245 * sizeof (unsigned int));
246 for (cnt = 0; cnt < 256; ++cnt)
247 ctype->charnames[cnt] = cnt;
248 ctype->charnames_act = 256;
249
250 /* Fill character class information. */
251 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
252 /* The order of the following instructions determines the bit
253 positions! */
254 ctype_class_new (lr, ctype, "upper");
255 ctype_class_new (lr, ctype, "lower");
256 ctype_class_new (lr, ctype, "alpha");
257 ctype_class_new (lr, ctype, "digit");
258 ctype_class_new (lr, ctype, "xdigit");
259 ctype_class_new (lr, ctype, "space");
260 ctype_class_new (lr, ctype, "print");
261 ctype_class_new (lr, ctype, "graph");
262 ctype_class_new (lr, ctype, "blank");
263 ctype_class_new (lr, ctype, "cntrl");
264 ctype_class_new (lr, ctype, "punct");
265 ctype_class_new (lr, ctype, "alnum");
011ebfab 266#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
267 /* The following are extensions from ISO 14652. */
268 ctype_class_new (lr, ctype, "left_to_right");
269 ctype_class_new (lr, ctype, "right_to_left");
270 ctype_class_new (lr, ctype, "num_terminator");
271 ctype_class_new (lr, ctype, "num_separator");
272 ctype_class_new (lr, ctype, "segment_separator");
273 ctype_class_new (lr, ctype, "block_separator");
274 ctype_class_new (lr, ctype, "direction_control");
275 ctype_class_new (lr, ctype, "sym_swap_layout");
276 ctype_class_new (lr, ctype, "char_shape_selector");
277 ctype_class_new (lr, ctype, "num_shape_selector");
278 ctype_class_new (lr, ctype, "non_spacing");
279 ctype_class_new (lr, ctype, "non_spacing_level3");
280 ctype_class_new (lr, ctype, "normal_connect");
281 ctype_class_new (lr, ctype, "r_connect");
282 ctype_class_new (lr, ctype, "no_connect");
283 ctype_class_new (lr, ctype, "no_connect-space");
284 ctype_class_new (lr, ctype, "vowel_connect");
011ebfab 285#endif
4b10dd6c
UD
286
287 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
288 ctype->class_collection
289 = (uint32_t *) xcalloc (sizeof (unsigned long int),
290 ctype->class_collection_max);
291 ctype->class_collection_act = 256;
292
293 /* Fill character map information. */
4b10dd6c
UD
294 ctype->last_map_idx = MAX_NR_CHARMAP;
295 ctype_map_new (lr, ctype, "toupper", charmap);
296 ctype_map_new (lr, ctype, "tolower", charmap);
011ebfab 297#ifdef PREDEFINED_CLASSES
4b10dd6c 298 ctype_map_new (lr, ctype, "tosymmetric", charmap);
011ebfab 299#endif
4b10dd6c
UD
300
301 /* Fill first 256 entries in `toXXX' arrays. */
302 for (cnt = 0; cnt < 256; ++cnt)
303 {
304 ctype->map_collection[0][cnt] = cnt;
305 ctype->map_collection[1][cnt] = cnt;
9e2b7438 306#ifdef PREDEFINED_CLASSES
4b10dd6c 307 ctype->map_collection[2][cnt] = cnt;
9e2b7438 308#endif
4b10dd6c
UD
309 ctype->map256_collection[0][cnt] = cnt;
310 ctype->map256_collection[1][cnt] = cnt;
311 }
312
a673fbcb 313 obstack_init (&ctype->mempool);
19bc17a9
RM
314 }
315}
316
317
318void
4b10dd6c 319ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
19bc17a9
RM
320{
321 /* See POSIX.2, table 2-6 for the meaning of the following table. */
322#define NCLASS 12
323 static const struct
324 {
325 const char *name;
326 const char allow[NCLASS];
327 }
328 valid_table[NCLASS] =
329 {
330 /* The order is important. See token.h for more information.
331 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
332 { "upper", "--MX-XDDXXX-" },
333 { "lower", "--MX-XDDXXX-" },
334 { "alpha", "---X-XDDXXX-" },
335 { "digit", "XXX--XDDXXX-" },
336 { "xdigit", "-----XDDXXX-" },
337 { "space", "XXXXX------X" },
338 { "print", "---------X--" },
339 { "graph", "---------X--" },
340 { "blank", "XXXXXM-----X" },
341 { "cntrl", "XXXXX-XX--XX" },
342 { "punct", "XXXXX-DD-X-X" },
343 { "alnum", "-----XDDXXX-" }
344 };
345 size_t cnt;
346 int cls1, cls2;
4b10dd6c
UD
347 uint32_t space_value;
348 struct charseq *space_seq;
19bc17a9 349 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 350 int warned;
0e16ecfa
UD
351 const void *key;
352 size_t len;
353 void *vdata;
354 void *curs;
19bc17a9 355
b9eb05d6
UD
356 /* Now resolve copying and also handle completely missing definitions. */
357 if (ctype == NULL)
358 {
70e51ab9
UD
359 const char *repertoire_name;
360
b9eb05d6
UD
361 /* First see whether we were supposed to copy. If yes, find the
362 actual definition. */
363 if (locale->copy_name[LC_CTYPE] != NULL)
364 {
365 /* Find the copying locale. This has to happen transitively since
366 the locale we are copying from might also copying another one. */
367 struct localedef_t *from = locale;
368
369 do
370 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
371 from->repertoire_name, charmap);
372 while (from->categories[LC_CTYPE].ctype == NULL
373 && from->copy_name[LC_CTYPE] != NULL);
374
375 ctype = locale->categories[LC_CTYPE].ctype
376 = from->categories[LC_CTYPE].ctype;
377 }
378
379 /* If there is still no definition issue an warning and create an
380 empty one. */
381 if (ctype == NULL)
382 {
f6ada7ad
UD
383 if (! be_quiet)
384 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
b9eb05d6
UD
385 ctype_startup (NULL, locale, charmap, 0);
386 ctype = locale->categories[LC_CTYPE].ctype;
387 }
70e51ab9
UD
388
389 /* Get the repertoire we have to use. */
390 repertoire_name = locale->repertoire_name ?: repertoire_global;
391 if (repertoire_name != NULL)
392 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
393 }
394
db76d943
UD
395 /* We need the name of the currently used 8-bit character set to
396 make correct conversion between this 8-bit representation and the
397 ISO 10646 character set used internally for wide characters. */
398 ctype->codeset_name = charmap->code_set_name;
399 if (ctype->codeset_name == NULL)
400 {
401 if (! be_quiet)
402 error (0, 0, "no character set name specified in charmap");
403 ctype->codeset_name = "//UNKNOWN//";
404 }
405
19bc17a9 406 /* Set default value for classes not specified. */
4b10dd6c 407 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
408
409 /* Check according to table. */
42d7c593 410 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 411 {
4b10dd6c 412 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 413
4b10dd6c
UD
414 if (tmp != 0)
415 {
416 for (cls1 = 0; cls1 < NCLASS; ++cls1)
417 if ((tmp & _ISwbit (cls1)) != 0)
418 for (cls2 = 0; cls2 < NCLASS; ++cls2)
419 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 420 {
4b10dd6c
UD
421 int eq = (tmp & _ISwbit (cls2)) != 0;
422 switch (valid_table[cls1].allow[cls2])
19bc17a9 423 {
4b10dd6c
UD
424 case 'M':
425 if (!eq)
426 {
427 uint32_t value = ctype->charnames[cnt];
428
429 if (!be_quiet)
430 error (0, 0, _("\
431character L'\\u%0*x' in class `%s' must be in class `%s'"),
432 value > 0xffff ? 8 : 4, value,
433 valid_table[cls1].name,
434 valid_table[cls2].name);
435 }
436 break;
437
438 case 'X':
439 if (eq)
440 {
441 uint32_t value = ctype->charnames[cnt];
442
443 if (!be_quiet)
444 error (0, 0, _("\
445character L'\\u%0*x' in class `%s' must not be in class `%s'"),
446 value > 0xffff ? 8 : 4, value,
447 valid_table[cls1].name,
448 valid_table[cls2].name);
449 }
450 break;
451
452 case 'D':
453 ctype->class_collection[cnt] |= _ISwbit (cls2);
454 break;
455
456 default:
457 error (5, 0, _("internal error in %s, line %u"),
458 __FUNCTION__, __LINE__);
19bc17a9 459 }
4b10dd6c
UD
460 }
461 }
462 }
463
464 for (cnt = 0; cnt < 256; ++cnt)
465 {
466 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 467
4b10dd6c
UD
468 if (tmp != 0)
469 {
470 for (cls1 = 0; cls1 < NCLASS; ++cls1)
471 if ((tmp & _ISbit (cls1)) != 0)
472 for (cls2 = 0; cls2 < NCLASS; ++cls2)
473 if (valid_table[cls1].allow[cls2] != '-')
474 {
475 int eq = (tmp & _ISbit (cls2)) != 0;
476 switch (valid_table[cls1].allow[cls2])
19bc17a9 477 {
4b10dd6c
UD
478 case 'M':
479 if (!eq)
480 {
481 char buf[17];
482
5d431a3e 483 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
484
485 if (!be_quiet)
486 error (0, 0, _("\
487character '%s' in class `%s' must be in class `%s'"),
488 buf, valid_table[cls1].name,
489 valid_table[cls2].name);
490 }
491 break;
492
493 case 'X':
494 if (eq)
495 {
496 char buf[17];
497
5d431a3e 498 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
499
500 if (!be_quiet)
501 error (0, 0, _("\
502character '%s' in class `%s' must not be in class `%s'"),
503 buf, valid_table[cls1].name,
504 valid_table[cls2].name);
505 }
506 break;
507
508 case 'D':
509 ctype->class256_collection[cnt] |= _ISbit (cls2);
510 break;
511
512 default:
513 error (5, 0, _("internal error in %s, line %u"),
514 __FUNCTION__, __LINE__);
19bc17a9 515 }
4b10dd6c
UD
516 }
517 }
19bc17a9
RM
518 }
519
520 /* ... and now test <SP> as a special case. */
a0dc5206
UD
521 space_value = 32;
522 if (((cnt = BITPOS (tok_space),
523 (ELEM (ctype, class_collection, , space_value)
524 & BITw (tok_space)) == 0)
525 || (cnt = BITPOS (tok_blank),
526 (ELEM (ctype, class_collection, , space_value)
527 & BITw (tok_blank)) == 0)))
880f421f
UD
528 {
529 if (!be_quiet)
530 error (0, 0, _("<SP> character not in class `%s'"),
531 valid_table[cnt].name);
532 }
c84142e8
UD
533 else if (((cnt = BITPOS (tok_punct),
534 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 535 & BITw (tok_punct)) != 0)
c84142e8
UD
536 || (cnt = BITPOS (tok_graph),
537 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 538 & BITw (tok_graph))
880f421f
UD
539 != 0)))
540 {
541 if (!be_quiet)
542 error (0, 0, _("<SP> character must not be in class `%s'"),
543 valid_table[cnt].name);
544 }
19bc17a9 545 else
4b10dd6c
UD
546 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
547
548 space_seq = charmap_find_value (charmap, "SP", 2);
ce177a84 549 if (space_seq == NULL)
45c95239
UD
550 space_seq = charmap_find_value (charmap, "space", 5);
551 if (space_seq == NULL)
1b97149d 552 space_seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
553 if (space_seq == NULL || space_seq->nbytes != 1)
554 {
555 if (!be_quiet)
556 error (0, 0, _("character <SP> not defined in character map"));
557 }
558 else if (((cnt = BITPOS (tok_space),
559 (ctype->class256_collection[space_seq->bytes[0]]
560 & BIT (tok_space)) == 0)
561 || (cnt = BITPOS (tok_blank),
562 (ctype->class256_collection[space_seq->bytes[0]]
563 & BIT (tok_blank)) == 0)))
564 {
565 if (!be_quiet)
566 error (0, 0, _("<SP> character not in class `%s'"),
567 valid_table[cnt].name);
568 }
569 else if (((cnt = BITPOS (tok_punct),
570 (ctype->class256_collection[space_seq->bytes[0]]
571 & BIT (tok_punct)) != 0)
572 || (cnt = BITPOS (tok_graph),
573 (ctype->class256_collection[space_seq->bytes[0]]
574 & BIT (tok_graph)) != 0)))
575 {
576 if (!be_quiet)
577 error (0, 0, _("<SP> character must not be in class `%s'"),
578 valid_table[cnt].name);
579 }
580 else
581 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204
RM
582
583 /* Now that the tests are done make sure the name array contains all
584 characters which are handled in the WIDTH section of the
585 character set definition file. */
4b10dd6c
UD
586 if (charmap->width_rules != NULL)
587 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 588 {
827ff758
UD
589 unsigned char bytes[charmap->mb_cur_max];
590 int nbytes = charmap->width_rules[cnt].from->nbytes;
591
592 /* We have the range of character for which the width is
593 specified described using byte sequences of the multibyte
594 charset. We have to convert this to UCS4 now. And we
595 cannot simply convert the beginning and the end of the
596 sequence, we have to iterate over the byte sequence and
597 convert it for every single character. */
598 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
599
600 while (nbytes < charmap->width_rules[cnt].to->nbytes
601 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
602 nbytes) <= 0)
603 {
604 /* Find the UCS value for `bytes'. */
827ff758 605 int inner;
76e680a8
UD
606 uint32_t wch;
607 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
608
609 if (seq == NULL)
610 wch = ILLEGAL_CHAR_VALUE;
611 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
612 wch = seq->ucs4;
613 else
614 wch = repertoire_find_value (ctype->repertoire, seq->name,
615 strlen (seq->name));
827ff758
UD
616
617 if (wch != ILLEGAL_CHAR_VALUE)
618 /* We are only interested in the side-effects of the
619 `find_idx' call. It will add appropriate entries in
620 the name array if this is necessary. */
621 (void) find_idx (ctype, NULL, NULL, NULL, wch);
622
623 /* "Increment" the bytes sequence. */
624 inner = nbytes - 1;
625 while (inner >= 0 && bytes[inner] == 0xff)
626 --inner;
627
628 if (inner < 0)
629 {
630 /* We have to extend the byte sequence. */
631 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
632 break;
633
634 bytes[0] = 1;
635 memset (&bytes[1], 0, nbytes);
636 ++nbytes;
637 }
638 else
639 {
640 ++bytes[inner];
641 while (++inner < nbytes)
642 bytes[inner] = 0;
643 }
644 }
4b10dd6c
UD
645 }
646
0e16ecfa
UD
647 /* Now set all the other characters of the character set to the
648 default width. */
649 curs = NULL;
650 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
651 {
652 struct charseq *data = (struct charseq *) vdata;
653
654 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
655 data->ucs4 = repertoire_find_value (ctype->repertoire,
656 data->name, len);
657
658 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
659 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
660 }
661
4b10dd6c
UD
662 /* There must be a multiple of 10 digits. */
663 if (ctype->mbdigits_act % 10 != 0)
664 {
665 assert (ctype->mbdigits_act == ctype->wcdigits_act);
666 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
667 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
668 error (0, 0, _("`digit' category has not entries in groups of ten"));
669 }
670
671 /* Check the input digits. There must be a multiple of ten available.
42d7c593 672 In each group it could be that one or the other character is missing.
4b10dd6c
UD
673 In this case the whole group must be removed. */
674 cnt = 0;
675 while (cnt < ctype->mbdigits_act)
676 {
677 size_t inner;
678 for (inner = 0; inner < 10; ++inner)
679 if (ctype->mbdigits[cnt + inner] == NULL)
680 break;
681
682 if (inner == 10)
683 cnt += 10;
684 else
685 {
686 /* Remove the group. */
687 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
688 ((ctype->wcdigits_act - cnt - 10)
689 * sizeof (ctype->mbdigits[0])));
690 ctype->mbdigits_act -= 10;
691 }
692 }
693
694 /* If no input digits are given use the default. */
695 if (ctype->mbdigits_act == 0)
696 {
697 if (ctype->mbdigits_max == 0)
698 {
699 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
700 10 * sizeof (struct charseq *));
701 ctype->mbdigits_max = 10;
702 }
703
704 for (cnt = 0; cnt < 10; ++cnt)
705 {
706 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
707 digits + cnt, 1);
708 if (ctype->mbdigits[cnt] == NULL)
709 {
710 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
711 longnames[cnt],
712 strlen (longnames[cnt]));
713 if (ctype->mbdigits[cnt] == NULL)
714 {
715 /* Hum, this ain't good. */
716 error (0, 0, _("\
717no input digits defined and none of the standard names in the charmap"));
718
719 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
720 sizeof (struct charseq) + 1);
721
722 /* This is better than nothing. */
723 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
724 ctype->mbdigits[cnt]->nbytes = 1;
725 }
726 }
727 }
728
729 ctype->mbdigits_act = 10;
730 }
731
732 /* Check the wide character input digits. There must be a multiple
42d7c593 733 of ten available. In each group it could be that one or the other
4b10dd6c
UD
734 character is missing. In this case the whole group must be
735 removed. */
736 cnt = 0;
737 while (cnt < ctype->wcdigits_act)
738 {
739 size_t inner;
740 for (inner = 0; inner < 10; ++inner)
741 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
742 break;
743
744 if (inner == 10)
745 cnt += 10;
746 else
747 {
748 /* Remove the group. */
749 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
750 ((ctype->wcdigits_act - cnt - 10)
751 * sizeof (ctype->wcdigits[0])));
752 ctype->wcdigits_act -= 10;
753 }
754 }
755
756 /* If no input digits are given use the default. */
757 if (ctype->wcdigits_act == 0)
758 {
759 if (ctype->wcdigits_max == 0)
760 {
761 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
762 10 * sizeof (uint32_t));
763 ctype->wcdigits_max = 10;
764 }
765
766 for (cnt = 0; cnt < 10; ++cnt)
767 ctype->wcdigits[cnt] = L'0' + cnt;
768
769 ctype->mbdigits_act = 10;
770 }
771
772 /* Check the outdigits. */
773 warned = 0;
774 for (cnt = 0; cnt < 10; ++cnt)
775 if (ctype->mboutdigits[cnt] == NULL)
776 {
777 static struct charseq replace[2];
778
779 if (!warned)
780 {
781 error (0, 0, _("\
782not all characters used in `outdigit' are available in the charmap"));
783 warned = 1;
784 }
785
786 replace[0].nbytes = 1;
787 replace[0].bytes[0] = '?';
788 replace[0].bytes[1] = '\0';
789 ctype->mboutdigits[cnt] = &replace[0];
790 }
791
792 warned = 0;
793 for (cnt = 0; cnt < 10; ++cnt)
794 if (ctype->wcoutdigits[cnt] == 0)
795 {
796 if (!warned)
797 {
798 error (0, 0, _("\
799not all characters used in `outdigit' are available in the repertoire"));
800 warned = 1;
801 }
802
803 ctype->wcoutdigits[cnt] = L'?';
75cd5204 804 }
a8e4c924
UD
805
806 /* Sort the entries in the translit_ignore list. */
807 if (ctype->translit_ignore != NULL)
808 {
809 struct translit_ignore_t *firstp = ctype->translit_ignore;
810 struct translit_ignore_t *runp;
811
812 ctype->ntranslit_ignore = 1;
813
814 for (runp = firstp->next; runp != NULL; runp = runp->next)
815 {
816 struct translit_ignore_t *lastp = NULL;
817 struct translit_ignore_t *cmpp;
818
819 ++ctype->ntranslit_ignore;
820
821 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
822 if (runp->from < cmpp->from)
823 break;
824
825 runp->next = lastp;
826 if (lastp == NULL)
827 firstp = runp;
828 }
829
830 ctype->translit_ignore = firstp;
831 }
19bc17a9
RM
832}
833
834
835void
4b10dd6c 836ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
75cd5204 837 const char *output_path)
19bc17a9 838{
e43e0dd6 839 static const char nulbytes[4] = { 0, 0, 0, 0 };
19bc17a9
RM
840 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
841 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
ef446144
UD
842 + (oldstyle_tables
843 ? (ctype->map_collection_nr - 2)
844 : (ctype->nr_charclass + ctype->map_collection_nr)));
75cd5204 845 struct iovec iov[2 + nelems + ctype->nr_charclass
f175216d 846 + ctype->map_collection_nr + 2];
19bc17a9 847 struct locale_file data;
4b10dd6c 848 uint32_t idx[nelems + 1];
1d96d74d 849 uint32_t default_missing_len;
75cd5204 850 size_t elem, cnt, offset, total;
4b10dd6c 851 char *cp;
19bc17a9
RM
852
853 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 854 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9
RM
855
856 data.magic = LIMAGIC (LC_CTYPE);
857 data.n = nelems;
858 iov[0].iov_base = (void *) &data;
859 iov[0].iov_len = sizeof (data);
860
861 iov[1].iov_base = (void *) idx;
a0edd63e 862 iov[1].iov_len = nelems * sizeof (uint32_t);
19bc17a9
RM
863
864 idx[0] = iov[0].iov_len + iov[1].iov_len;
865 offset = 0;
866
867 for (elem = 0; elem < nelems; ++elem)
868 {
869 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
870 switch (elem)
871 {
c6df09ad
UD
872#define CTYPE_EMPTY(name) \
873 case name: \
04fbc779 874 iov[2 + elem + offset].iov_base = (void *) ""; \
c6df09ad
UD
875 iov[2 + elem + offset].iov_len = 0; \
876 idx[elem + 1] = idx[elem]; \
877 break
878
879 CTYPE_EMPTY(_NL_CTYPE_GAP1);
880 CTYPE_EMPTY(_NL_CTYPE_GAP2);
881 CTYPE_EMPTY(_NL_CTYPE_GAP3);
882
19bc17a9
RM
883#define CTYPE_DATA(name, base, len) \
884 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
885 iov[2 + elem + offset].iov_base = (base); \
886 iov[2 + elem + offset].iov_len = (len); \
1d96d74d 887 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
888 break
889
890 CTYPE_DATA (_NL_CTYPE_CLASS,
891 ctype->ctype_b,
892 (256 + 128) * sizeof (char_class_t));
893
4a33c2f5
UD
894 CTYPE_DATA (_NL_CTYPE_TOUPPER,
895 ctype->map[0],
f1d8b804 896 (256 + 128) * sizeof (uint32_t));
4a33c2f5
UD
897 CTYPE_DATA (_NL_CTYPE_TOLOWER,
898 ctype->map[1],
f1d8b804 899 (256 + 128) * sizeof (uint32_t));
19bc17a9 900
49f2be5b
UD
901 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
902 ctype->map32[0],
ef446144 903 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
49f2be5b
UD
904 * sizeof (uint32_t));
905 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
906 ctype->map32[1],
ef446144 907 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
49f2be5b
UD
908 * sizeof (uint32_t));
909
19bc17a9
RM
910 CTYPE_DATA (_NL_CTYPE_CLASS32,
911 ctype->ctype32_b,
ef446144
UD
912 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
913 * sizeof (char_class32_t));
19bc17a9 914
4a33c2f5 915 CTYPE_DATA (_NL_CTYPE_NAMES,
ef446144
UD
916 ctype->names,
917 (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 0)
918 * sizeof (uint32_t));
919
920 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
921 &ctype->class_offset, sizeof (uint32_t));
922
923 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
924 &ctype->map_offset, sizeof (uint32_t));
4a33c2f5 925
04fbc779
UD
926 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
927 &ctype->translit_idx_size, sizeof (uint32_t));
4a33c2f5
UD
928
929 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
930 ctype->translit_from_idx,
04fbc779 931 ctype->translit_idx_size * sizeof (uint32_t));
4b10dd6c 932
4a33c2f5
UD
933 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
934 ctype->translit_from_tbl,
4b10dd6c
UD
935 ctype->translit_from_tbl_size);
936
4a33c2f5
UD
937 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
938 ctype->translit_to_idx,
04fbc779 939 ctype->translit_idx_size * sizeof (uint32_t));
4b10dd6c 940
4a33c2f5
UD
941 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
942 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
4b10dd6c 943
4a33c2f5 944 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
4b10dd6c 945 &ctype->plane_size, sizeof (uint32_t));
4a33c2f5 946 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
4b10dd6c 947 &ctype->plane_cnt, sizeof (uint32_t));
19bc17a9 948
75cd5204
RM
949 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
950 /* The class name array. */
951 total = 0;
952 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
953 {
954 iov[2 + elem + offset].iov_base
955 = (void *) ctype->classnames[cnt];
956 iov[2 + elem + offset].iov_len
957 = strlen (ctype->classnames[cnt]) + 1;
958 total += iov[2 + elem + offset].iov_len;
959 }
e43e0dd6 960 iov[2 + elem + offset].iov_base = (void *) nulbytes;
ce7a5ef4
RM
961 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
962 total += 1 + (4 - ((total + 1) % 4));
75cd5204 963
4b10dd6c 964 idx[elem + 1] = idx[elem] + total;
75cd5204
RM
965 break;
966
967 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
968 /* The class name array. */
969 total = 0;
970 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
971 {
972 iov[2 + elem + offset].iov_base
973 = (void *) ctype->mapnames[cnt];
974 iov[2 + elem + offset].iov_len
975 = strlen (ctype->mapnames[cnt]) + 1;
976 total += iov[2 + elem + offset].iov_len;
977 }
e43e0dd6 978 iov[2 + elem + offset].iov_base = (void *) nulbytes;
ce7a5ef4
RM
979 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
980 total += 1 + (4 - ((total + 1) % 4));
75cd5204 981
4b10dd6c 982 idx[elem + 1] = idx[elem] + total;
75cd5204 983 break;
19bc17a9
RM
984
985 CTYPE_DATA (_NL_CTYPE_WIDTH,
ef446144
UD
986 (oldstyle_tables
987 ? ctype->width
988 : ctype->width_3level.iov_base),
989 (oldstyle_tables
990 ? (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul
991 : ctype->width_3level.iov_len));
19bc17a9 992
0200214b 993 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
4b10dd6c 994 &ctype->mb_cur_max, sizeof (uint32_t));
0200214b 995
ce7a5ef4
RM
996 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
997 total = strlen (ctype->codeset_name) + 1;
998 if (total % 4 == 0)
999 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1000 else
1001 {
1002 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
1003 memset (mempcpy (iov[2 + elem + offset].iov_base,
1004 ctype->codeset_name, total),
1005 '\0', 4 - (total & 3));
ce7a5ef4
RM
1006 total = (total + 3) & ~3;
1007 }
1008 iov[2 + elem + offset].iov_len = total;
4b10dd6c
UD
1009 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1010 break;
1011
4a33c2f5 1012 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
4b10dd6c
UD
1013 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1014 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
1015 *(uint32_t *) iov[2 + elem + offset].iov_base =
1016 ctype->mbdigits_act / 10;
a9c27b3e 1017 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
1018 break;
1019
4a33c2f5 1020 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
e43e0dd6
UD
1021 /* Align entries. */
1022 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1023 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1024 idx[elem] += iov[2 + elem + offset].iov_len;
1025 ++offset;
1026
4b10dd6c
UD
1027 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1028 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
1029 *(uint32_t *) iov[2 + elem + offset].iov_base =
1030 ctype->wcdigits_act / 10;
a9c27b3e 1031 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
1032 break;
1033
e43e0dd6 1034 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
4b10dd6c
UD
1035 /* Compute the length of all possible characters. For INDIGITS
1036 there might be more than one. We simply concatenate all of
1037 them with a NUL byte following. The NUL byte wouldn't be
1038 necessary but it makes it easier for the user. */
1039 total = 0;
f175216d 1040
498b733e 1041 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1042 cnt < ctype->mbdigits_act; cnt += 10)
1043 total += ctype->mbdigits[cnt]->nbytes + 1;
1044 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1045 iov[2 + elem + offset].iov_len = total;
1046
1047 cp = iov[2 + elem + offset].iov_base;
498b733e 1048 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1049 cnt < ctype->mbdigits_act; cnt += 10)
1050 {
1051 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1052 ctype->mbdigits[cnt]->nbytes);
1053 *cp++ = '\0';
1054 }
a9c27b3e 1055 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1056 break;
1057
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1059 /* Compute the length of all possible characters. For INDIGITS
1060 there might be more than one. We simply concatenate all of
1061 them with a NUL byte following. The NUL byte wouldn't be
1062 necessary but it makes it easier for the user. */
498b733e 1063 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
4b10dd6c
UD
1064 total = ctype->mboutdigits[cnt]->nbytes + 1;
1065 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1066 iov[2 + elem + offset].iov_len = total;
1067
1068 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
498b733e
UD
1069 ctype->mboutdigits[cnt]->bytes,
1070 ctype->mboutdigits[cnt]->nbytes) = '\0';
a9c27b3e 1071 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1072 break;
1073
e43e0dd6 1074 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
4b10dd6c
UD
1075 total = ctype->wcdigits_act / 10;
1076
1077 iov[2 + elem + offset].iov_base =
1078 (uint32_t *) alloca (total * sizeof (uint32_t));
1079 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1080
498b733e 1081 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
4b10dd6c
UD
1082 cnt < ctype->wcdigits_act; cnt += 10)
1083 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
4a33c2f5 1084 = ctype->wcdigits[cnt];
a9c27b3e 1085 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1086 break;
1087
e43e0dd6
UD
1088 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1089 /* Align entries. */
1090 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1091 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1092 idx[elem] += iov[2 + elem + offset].iov_len;
1093 ++offset;
1094 /* FALLTRHOUGH */
1095
1096 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
498b733e 1097 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
4b10dd6c
UD
1098 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1099 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a9c27b3e 1100 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1101 break;
1102
a8e4c924
UD
1103 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1104 default_missing_len = (ctype->default_missing
1105 ? wcslen ((wchar_t *)ctype->default_missing)
7f455351 1106 : 0);
a8e4c924
UD
1107 iov[2 + elem + offset].iov_base = &default_missing_len;
1108 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1109 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1110 break;
1111
1d96d74d
UD
1112 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1113 iov[2 + elem + offset].iov_base =
1114 ctype->default_missing ?: (uint32_t *) L"";
1115 iov[2 + elem + offset].iov_len =
1116 wcslen (iov[2 + elem + offset].iov_base);
1117 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1118 break;
1119
a8e4c924
UD
1120 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1121 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1d96d74d 1122 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a8e4c924
UD
1123 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1124 break;
1125
1126 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1127 {
1128 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1129 * 3 * sizeof (uint32_t));
1130 struct translit_ignore_t *runp;
1131
1132 iov[2 + elem + offset].iov_base = ranges;
1133 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1134 * 3 * sizeof (uint32_t));
1135
1136 for (runp = ctype->translit_ignore; runp != NULL;
1137 runp = runp->next)
1138 {
1139 *ranges++ = runp->from;
1140 *ranges++ = runp->to;
1141 *ranges++ = runp->step;
1142 }
1143 }
1d96d74d
UD
1144 /* Remove the following line in case a new entry is added
1145 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1146 if (elem < nelems)
1147 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1148 break;
1149
19bc17a9
RM
1150 default:
1151 assert (! "unknown CTYPE element");
1152 }
1153 else
1154 {
1155 /* Handle extra maps. */
ef446144
UD
1156 if (oldstyle_tables)
1157 {
1158 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
19bc17a9 1159
ef446144
UD
1160 iov[2 + elem + offset].iov_base = ctype->map32[nr];
1161 iov[2 + elem + offset].iov_len = ((ctype->plane_size
1162 * ctype->plane_cnt)
1163 * sizeof (uint32_t));
19bc17a9 1164
ef446144
UD
1165 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1166 }
1167 else
1168 {
1169 size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
1170 if (nr < ctype->nr_charclass)
1171 {
1172 iov[2 + elem + offset] = ctype->class_3level[nr];
1173 }
1174 else
1175 {
1176 nr -= ctype->nr_charclass;
1177 assert (nr < ctype->map_collection_nr);
1178 iov[2 + elem + offset] = ctype->map_3level[nr];
1179 }
1180 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1181 }
75cd5204 1182 }
19bc17a9 1183 }
19bc17a9 1184
75cd5204 1185 assert (2 + elem + offset == (nelems + ctype->nr_charclass
f175216d 1186 + ctype->map_collection_nr + 2 + 2));
19bc17a9 1187
83b1b6d8 1188 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
1189}
1190
1191
4b10dd6c
UD
1192/* Local functions. */
1193static void
1194ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1195 const char *name)
19bc17a9 1196{
4b10dd6c 1197 size_t cnt;
19bc17a9 1198
4b10dd6c
UD
1199 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1200 if (strcmp (ctype->classnames[cnt], name) == 0)
1201 break;
19bc17a9 1202
4b10dd6c
UD
1203 if (cnt < ctype->nr_charclass)
1204 {
1205 lr_error (lr, _("character class `%s' already defined"), name);
1206 return;
1207 }
19bc17a9 1208
4b10dd6c
UD
1209 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1210 /* Exit code 2 is prescribed in P1003.2b. */
1211 error (2, 0, _("\
5d431a3e 1212implementation limit: no more than %Zd character classes allowed"),
4b10dd6c 1213 MAX_NR_CHARCLASS);
19bc17a9 1214
4b10dd6c 1215 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1216}
1217
1218
4b10dd6c
UD
1219static void
1220ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1221 const char *name, struct charmap_t *charmap)
19bc17a9 1222{
4b10dd6c 1223 size_t max_chars = 0;
ba1ffaa1 1224 size_t cnt;
19bc17a9 1225
4b10dd6c 1226 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1227 {
4b10dd6c
UD
1228 if (strcmp (ctype->mapnames[cnt], name) == 0)
1229 break;
1230
1231 if (max_chars < ctype->map_collection_max[cnt])
1232 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1233 }
1234
4b10dd6c
UD
1235 if (cnt < ctype->map_collection_nr)
1236 {
1237 lr_error (lr, _("character map `%s' already defined"), name);
1238 return;
1239 }
19bc17a9 1240
4b10dd6c
UD
1241 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1242 /* Exit code 2 is prescribed in P1003.2b. */
1243 error (2, 0, _("\
1244implementation limit: no more than %d character maps allowed"),
1245 MAX_NR_CHARMAP);
19bc17a9 1246
4b10dd6c
UD
1247 ctype->mapnames[cnt] = name;
1248
1249 if (max_chars == 0)
1250 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1251 else
1252 ctype->map_collection_max[cnt] = max_chars;
1253
1254 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1255 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1256 ctype->map_collection_act[cnt] = 256;
19bc17a9 1257
4b10dd6c 1258 ++ctype->map_collection_nr;
19bc17a9
RM
1259}
1260
1261
4b10dd6c 1262/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1263 is possible if we only want to extend the name array. */
4b10dd6c
UD
1264static uint32_t *
1265find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1266 size_t *act, uint32_t idx)
19bc17a9 1267{
4b10dd6c 1268 size_t cnt;
19bc17a9 1269
4b10dd6c
UD
1270 if (idx < 256)
1271 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1272
4b10dd6c
UD
1273 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1274 if (ctype->charnames[cnt] == idx)
1275 break;
19bc17a9 1276
4b10dd6c
UD
1277 /* We have to distinguish two cases: the name is found or not. */
1278 if (cnt == ctype->charnames_act)
1279 {
1280 /* Extend the name array. */
1281 if (ctype->charnames_act == ctype->charnames_max)
1282 {
1283 ctype->charnames_max *= 2;
5866b131 1284 ctype->charnames = (uint32_t *)
4b10dd6c 1285 xrealloc (ctype->charnames,
5866b131 1286 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1287 }
1288 ctype->charnames[ctype->charnames_act++] = idx;
1289 }
19bc17a9 1290
4b10dd6c
UD
1291 if (table == NULL)
1292 /* We have done everything we are asked to do. */
1293 return NULL;
19bc17a9 1294
4b10dd6c
UD
1295 if (cnt >= *act)
1296 {
1297 if (cnt >= *max)
1298 {
1299 size_t old_max = *max;
1300 do
1301 *max *= 2;
1302 while (*max <= cnt);
19bc17a9 1303
4b10dd6c 1304 *table =
5866b131 1305 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1306 memset (&(*table)[old_max], '\0',
1307 (*max - old_max) * sizeof (uint32_t));
1308 }
19bc17a9 1309
76e680a8 1310 *act = cnt + 1;
4b10dd6c 1311 }
19bc17a9 1312
4b10dd6c 1313 return &(*table)[cnt];
19bc17a9
RM
1314}
1315
1316
4b10dd6c
UD
1317static int
1318get_character (struct token *now, struct charmap_t *charmap,
1319 struct repertoire_t *repertoire,
1320 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1321{
4b10dd6c
UD
1322 if (now->tok == tok_bsymbol)
1323 {
1324 /* This will hopefully be the normal case. */
1325 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1326 now->val.str.lenmb);
1327 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1328 now->val.str.lenmb);
1329 }
1330 else if (now->tok == tok_ucs4)
1331 {
f0a4b6b1
UD
1332 char utmp[10];
1333
1334 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1335 *seqp = charmap_find_value (charmap, utmp, 9);
1336
1337 if (*seqp == NULL)
1338 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1339
4b10dd6c
UD
1340 if (*seqp == NULL)
1341 {
1342 /* Compute the value in the charmap from the UCS value. */
1343 const char *symbol = repertoire_find_symbol (repertoire,
1344 now->val.ucs4);
19bc17a9 1345
4b10dd6c
UD
1346 if (symbol == NULL)
1347 *seqp = NULL;
1348 else
1349 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1350
4b10dd6c
UD
1351 if (*seqp == NULL)
1352 {
723faa38
UD
1353 if (repertoire != NULL)
1354 {
1355 /* Insert a negative entry. */
1356 static const struct charseq negative
1357 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1358 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1359 sizeof (uint32_t));
1360 *newp = now->val.ucs4;
1361
1362 insert_entry (&repertoire->seq_table, newp,
1363 sizeof (uint32_t), (void *) &negative);
1364 }
4b10dd6c
UD
1365 }
1366 else
1367 (*seqp)->ucs4 = now->val.ucs4;
1368 }
1369 else if ((*seqp)->ucs4 != now->val.ucs4)
1370 *seqp = NULL;
19bc17a9 1371
4b10dd6c
UD
1372 *wchp = now->val.ucs4;
1373 }
1374 else if (now->tok == tok_charcode)
1375 {
1376 /* We must map from the byte code to UCS4. */
1377 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1378 now->val.str.lenmb);
19bc17a9 1379
4b10dd6c
UD
1380 if (*seqp == NULL)
1381 *wchp = ILLEGAL_CHAR_VALUE;
1382 else
1383 {
1384 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1385 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1386 strlen ((*seqp)->name));
1387 *wchp = (*seqp)->ucs4;
1388 }
1389 }
1390 else
1391 return 1;
19bc17a9
RM
1392
1393 return 0;
1394}
1395
1396
a0dc5206
UD
1397/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1398 the .(2). counterparts. */
4b10dd6c
UD
1399static void
1400charclass_symbolic_ellipsis (struct linereader *ldfile,
1401 struct locale_ctype_t *ctype,
1402 struct charmap_t *charmap,
1403 struct repertoire_t *repertoire,
1404 struct token *now,
1405 const char *last_str,
1406 unsigned long int class256_bit,
1407 unsigned long int class_bit, int base,
a0dc5206 1408 int ignore_content, int handle_digits, int step)
19bc17a9 1409{
4b10dd6c
UD
1410 const char *nowstr = now->val.str.startmb;
1411 char tmp[now->val.str.lenmb + 1];
1412 const char *cp;
1413 char *endp;
1414 unsigned long int from;
1415 unsigned long int to;
19bc17a9 1416
4b10dd6c
UD
1417 /* We have to compute the ellipsis values using the symbolic names. */
1418 assert (last_str != NULL);
1419
1420 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1421 {
4b10dd6c
UD
1422 invalid_range:
1423 lr_error (ldfile,
549b3c3a 1424 _("`%s' and `%.*s' are no valid names for symbolic range"),
f6ada7ad 1425 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1426 return;
19bc17a9
RM
1427 }
1428
4b10dd6c
UD
1429 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1430 /* Nothing to do, the names are the same. */
1431 return;
19bc17a9 1432
4b10dd6c
UD
1433 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1434 ;
19bc17a9 1435
4b10dd6c
UD
1436 errno = 0;
1437 from = strtoul (cp, &endp, base);
1438 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1439 goto invalid_range;
19bc17a9 1440
4b10dd6c 1441 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1442 if ((to == UINT_MAX && errno == ERANGE)
1443 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1444 goto invalid_range;
19bc17a9 1445
4b10dd6c
UD
1446 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1447 if (!ignore_content)
1448 {
1449 now->val.str.startmb = tmp;
a0dc5206 1450 while ((from += step) <= to)
4b10dd6c
UD
1451 {
1452 struct charseq *seq;
1453 uint32_t wch;
19bc17a9 1454
4b10dd6c
UD
1455 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1456 last_str, now->val.str.lenmb - (cp - last_str), from);
19bc17a9 1457
4b10dd6c
UD
1458 get_character (now, charmap, repertoire, &seq, &wch);
1459
1460 if (seq != NULL && seq->nbytes == 1)
1461 /* Yep, we can store information about this byte sequence. */
1462 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1463
4b10dd6c
UD
1464 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1465 /* We have the UCS4 position. */
1466 *find_idx (ctype, &ctype->class_collection,
1467 &ctype->class_collection_max,
1468 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1469
4b10dd6c
UD
1470 if (handle_digits == 1)
1471 {
1472 /* We must store the digit values. */
1473 if (ctype->mbdigits_act == ctype->mbdigits_max)
1474 {
1475 ctype->mbdigits_max *= 2;
1476 ctype->mbdigits = xrealloc (ctype->mbdigits,
1477 (ctype->mbdigits_max
1478 * sizeof (char *)));
1479 ctype->wcdigits_max *= 2;
1480 ctype->wcdigits = xrealloc (ctype->wcdigits,
1481 (ctype->wcdigits_max
1482 * sizeof (uint32_t)));
1483 }
1484
1485 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1486 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1487 }
1488 else if (handle_digits == 2)
1489 {
1490 /* We must store the digit values. */
1491 if (ctype->outdigits_act >= 10)
1492 {
1493 lr_error (ldfile, _("\
1494%s: field `%s' does not contain exactly ten entries"),
1495 "LC_CTYPE", "outdigit");
1496 return;
1497 }
1498
1499 ctype->mboutdigits[ctype->outdigits_act] = seq;
1500 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1501 ++ctype->outdigits_act;
1502 }
1503 }
1504 }
19bc17a9
RM
1505}
1506
1507
a0dc5206 1508/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
4b10dd6c
UD
1509static void
1510charclass_ucs4_ellipsis (struct linereader *ldfile,
1511 struct locale_ctype_t *ctype,
1512 struct charmap_t *charmap,
1513 struct repertoire_t *repertoire,
1514 struct token *now, uint32_t last_wch,
1515 unsigned long int class256_bit,
1516 unsigned long int class_bit, int ignore_content,
a0dc5206 1517 int handle_digits, int step)
19bc17a9 1518{
4b10dd6c 1519 if (last_wch > now->val.ucs4)
19bc17a9 1520 {
4b10dd6c
UD
1521 lr_error (ldfile, _("\
1522to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1523 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1524 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1525 return;
1526 }
1527
4b10dd6c 1528 if (!ignore_content)
a0dc5206 1529 while ((last_wch += step) <= now->val.ucs4)
4b10dd6c
UD
1530 {
1531 /* We have to find out whether there is a byte sequence corresponding
1532 to this UCS4 value. */
f0a4b6b1
UD
1533 struct charseq *seq;
1534 char utmp[10];
1535
1536 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1537 seq = charmap_find_value (charmap, utmp, 9);
a0dc5206
UD
1538 if (seq == NULL)
1539 {
1540 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1541 seq = charmap_find_value (charmap, utmp, 5);
1542 }
f0a4b6b1
UD
1543
1544 if (seq == NULL)
1545 /* Try looking in the repertoire map. */
1546 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1547
4b10dd6c
UD
1548 /* If this is the first time we look for this sequence create a new
1549 entry. */
1550 if (seq == NULL)
1551 {
f0a4b6b1
UD
1552 static const struct charseq negative
1553 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1554
f0a4b6b1
UD
1555 /* Find the symbolic name for this UCS4 value. */
1556 if (repertoire != NULL)
4b10dd6c 1557 {
f0a4b6b1
UD
1558 const char *symbol = repertoire_find_symbol (repertoire,
1559 last_wch);
5866b131
UD
1560 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1561 sizeof (uint32_t));
f0a4b6b1
UD
1562 *newp = last_wch;
1563
1564 if (symbol != NULL)
1565 /* We have a name, now search the multibyte value. */
1566 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1567
1568 if (seq == NULL)
1569 /* We have to create a fake entry. */
1570 seq = (struct charseq *) &negative;
1571 else
1572 seq->ucs4 = last_wch;
1573
5866b131
UD
1574 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1575 seq);
4b10dd6c
UD
1576 }
1577 else
f0a4b6b1
UD
1578 /* We have to create a fake entry. */
1579 seq = (struct charseq *) &negative;
4b10dd6c
UD
1580 }
1581
1582 /* We have a name, now search the multibyte value. */
1583 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1584 /* Yep, we can store information about this byte sequence. */
1585 ctype->class256_collection[(size_t) seq->bytes[0]]
1586 |= class256_bit;
1587
1588 /* And of course we have the UCS4 position. */
5866b131 1589 if (class_bit != 0)
4b10dd6c
UD
1590 *find_idx (ctype, &ctype->class_collection,
1591 &ctype->class_collection_max,
1592 &ctype->class_collection_act, last_wch) |= class_bit;
1593
1594 if (handle_digits == 1)
1595 {
1596 /* We must store the digit values. */
1597 if (ctype->mbdigits_act == ctype->mbdigits_max)
1598 {
1599 ctype->mbdigits_max *= 2;
1600 ctype->mbdigits = xrealloc (ctype->mbdigits,
1601 (ctype->mbdigits_max
1602 * sizeof (char *)));
1603 ctype->wcdigits_max *= 2;
1604 ctype->wcdigits = xrealloc (ctype->wcdigits,
1605 (ctype->wcdigits_max
1606 * sizeof (uint32_t)));
1607 }
1608
1609 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1610 ? seq : NULL);
1611 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1612 }
1613 else if (handle_digits == 2)
1614 {
1615 /* We must store the digit values. */
1616 if (ctype->outdigits_act >= 10)
1617 {
1618 lr_error (ldfile, _("\
1619%s: field `%s' does not contain exactly ten entries"),
1620 "LC_CTYPE", "outdigit");
1621 return;
1622 }
19bc17a9 1623
4b10dd6c
UD
1624 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1625 ? seq : NULL);
1626 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1627 ++ctype->outdigits_act;
1628 }
1629 }
19bc17a9
RM
1630}
1631
1632
4b10dd6c 1633/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1634static void
4b10dd6c
UD
1635charclass_charcode_ellipsis (struct linereader *ldfile,
1636 struct locale_ctype_t *ctype,
1637 struct charmap_t *charmap,
1638 struct repertoire_t *repertoire,
1639 struct token *now, char *last_charcode,
1640 uint32_t last_charcode_len,
1641 unsigned long int class256_bit,
1642 unsigned long int class_bit, int ignore_content,
1643 int handle_digits)
19bc17a9 1644{
4b10dd6c
UD
1645 /* First check whether the to-value is larger. */
1646 if (now->val.charcode.nbytes != last_charcode_len)
1647 {
1648 lr_error (ldfile, _("\
1649start end end character sequence of range must have the same length"));
1650 return;
1651 }
19bc17a9 1652
4b10dd6c 1653 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1654 {
4b10dd6c
UD
1655 lr_error (ldfile, _("\
1656to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1657 return;
1658 }
1659
4b10dd6c
UD
1660 if (!ignore_content)
1661 {
1662 do
1663 {
1664 /* Increment the byte sequence value. */
1665 struct charseq *seq;
1666 uint32_t wch;
1667 int i;
1668
1669 for (i = last_charcode_len - 1; i >= 0; --i)
1670 if (++last_charcode[i] != 0)
1671 break;
1672
1673 if (last_charcode_len == 1)
1674 /* Of course we have the charcode value. */
1675 ctype->class256_collection[(size_t) last_charcode[0]]
1676 |= class256_bit;
1677
1678 /* Find the symbolic name. */
1679 seq = charmap_find_symbol (charmap, last_charcode,
1680 last_charcode_len);
1681 if (seq != NULL)
1682 {
1683 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1684 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1685 strlen (seq->name));
f0a4b6b1 1686 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1687
1688 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1689 *find_idx (ctype, &ctype->class_collection,
1690 &ctype->class_collection_max,
1691 &ctype->class_collection_act, wch) |= class_bit;
1692 }
1693 else
1694 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1695
4b10dd6c
UD
1696 if (handle_digits == 1)
1697 {
1698 /* We must store the digit values. */
1699 if (ctype->mbdigits_act == ctype->mbdigits_max)
1700 {
1701 ctype->mbdigits_max *= 2;
1702 ctype->mbdigits = xrealloc (ctype->mbdigits,
1703 (ctype->mbdigits_max
1704 * sizeof (char *)));
1705 ctype->wcdigits_max *= 2;
1706 ctype->wcdigits = xrealloc (ctype->wcdigits,
1707 (ctype->wcdigits_max
1708 * sizeof (uint32_t)));
1709 }
1710
1711 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1712 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1713 seq->nbytes = last_charcode_len;
1714
1715 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1716 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1717 }
1718 else if (handle_digits == 2)
1719 {
1720 struct charseq *seq;
1721 /* We must store the digit values. */
1722 if (ctype->outdigits_act >= 10)
1723 {
1724 lr_error (ldfile, _("\
1725%s: field `%s' does not contain exactly ten entries"),
1726 "LC_CTYPE", "outdigit");
1727 return;
1728 }
1729
1730 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1731 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1732 seq->nbytes = last_charcode_len;
1733
1734 ctype->mboutdigits[ctype->outdigits_act] = seq;
1735 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1736 ++ctype->outdigits_act;
1737 }
1738 }
1739 while (memcmp (last_charcode, now->val.charcode.bytes,
1740 last_charcode_len) != 0);
1741 }
19bc17a9
RM
1742}
1743
1744
4b10dd6c
UD
1745/* Read one transliteration entry. */
1746static uint32_t *
1747read_widestring (struct linereader *ldfile, struct token *now,
1748 struct charmap_t *charmap, struct repertoire_t *repertoire)
19bc17a9 1749{
4b10dd6c 1750 uint32_t *wstr;
19bc17a9 1751
4b10dd6c
UD
1752 if (now->tok == tok_default_missing)
1753 /* The special name "" will denote this case. */
5866b131 1754 wstr = ((uint32_t *) { 0 });
4b10dd6c 1755 else if (now->tok == tok_bsymbol)
19bc17a9 1756 {
4b10dd6c 1757 /* Get the value from the repertoire. */
a673fbcb 1758 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1759 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1760 now->val.str.lenmb);
1761 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1762 {
1763 /* We cannot proceed, we don't know the UCS4 value. */
1764 free (wstr);
1765 return NULL;
1766 }
4b10dd6c
UD
1767
1768 wstr[1] = 0;
19bc17a9 1769 }
4b10dd6c 1770 else if (now->tok == tok_ucs4)
19bc17a9 1771 {
a673fbcb 1772 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1773 wstr[0] = now->val.ucs4;
1774 wstr[1] = 0;
1775 }
1776 else if (now->tok == tok_charcode)
1777 {
1778 /* Argh, we have to convert to the symbol name first and then to the
1779 UCS4 value. */
1780 struct charseq *seq = charmap_find_symbol (charmap,
1781 now->val.str.startmb,
1782 now->val.str.lenmb);
1783 if (seq == NULL)
1784 /* Cannot find the UCS4 value. */
1785 return NULL;
1786
1787 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1788 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1789 strlen (seq->name));
1790 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1791 /* We cannot proceed, we don't know the UCS4 value. */
1792 return NULL;
1793
a673fbcb 1794 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1795 wstr[0] = seq->ucs4;
1796 wstr[1] = 0;
1797 }
1798 else if (now->tok == tok_string)
1799 {
1800 wstr = now->val.str.startwc;
a673fbcb 1801 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1802 return NULL;
1803 }
1804 else
1805 {
1806 if (now->tok != tok_eol && now->tok != tok_eof)
1807 lr_ignore_rest (ldfile, 0);
1808 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1809 return (uint32_t *) -1l;
19bc17a9
RM
1810 }
1811
4b10dd6c
UD
1812 return wstr;
1813}
19bc17a9 1814
19bc17a9 1815
4b10dd6c
UD
1816static void
1817read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1818 struct token *now, struct charmap_t *charmap,
1819 struct repertoire_t *repertoire)
1820{
1821 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1822 struct translit_t *result;
1823 struct translit_to_t **top;
a673fbcb 1824 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1825 int first;
1826 int ignore;
1827
1828 if (from_wstr == NULL)
1829 /* There is no valid from string. */
1830 return;
19bc17a9 1831
4b10dd6c
UD
1832 result = (struct translit_t *) obstack_alloc (ob,
1833 sizeof (struct translit_t));
1834 result->from = from_wstr;
a673fbcb
UD
1835 result->fname = ldfile->fname;
1836 result->lineno = ldfile->lineno;
4b10dd6c
UD
1837 result->next = NULL;
1838 result->to = NULL;
1839 top = &result->to;
1840 first = 1;
1841 ignore = 0;
1842
1843 while (1)
1844 {
1845 uint32_t *to_wstr;
1846
1847 /* Next we have one or more transliterations. They are
1848 separated by semicolons. */
1849 now = lr_token (ldfile, charmap, repertoire);
1850
1851 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1852 {
1853 /* One string read. */
1854 const uint32_t zero = 0;
1855
1856 if (!ignore)
1857 {
1858 obstack_grow (ob, &zero, 4);
1859 to_wstr = obstack_finish (ob);
1860
1861 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1862 (*top)->str = to_wstr;
1863 (*top)->next = NULL;
1864 }
1865
1866 if (now->tok == tok_eol)
1867 {
1868 result->next = ctype->translit;
1869 ctype->translit = result;
1870 return;
1871 }
1872
1873 if (!ignore)
1874 top = &(*top)->next;
1875 ignore = 0;
1876 }
1877 else
1878 {
1879 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1880 if (to_wstr == (uint32_t *) -1l)
1881 {
1882 /* An error occurred. */
1883 obstack_free (ob, result);
1884 return;
1885 }
1886
1887 if (to_wstr == NULL)
1888 ignore = 1;
1889 else
1890 /* This value is usable. */
1891 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1892
4b10dd6c
UD
1893 first = 0;
1894 }
1895 }
19bc17a9
RM
1896}
1897
1898
a673fbcb
UD
1899static void
1900read_translit_ignore_entry (struct linereader *ldfile,
1901 struct locale_ctype_t *ctype,
1902 struct charmap_t *charmap,
1903 struct repertoire_t *repertoire)
1904{
1905 /* We expect a semicolon-separated list of characters we ignore. We are
1906 only interested in the wide character definitions. These must be
1907 single characters, possibly defining a range when an ellipsis is used. */
1908 while (1)
1909 {
1910 struct token *now = lr_token (ldfile, charmap, repertoire);
1911 struct translit_ignore_t *newp;
1912 uint32_t from;
1913
1914 if (now->tok == tok_eol || now->tok == tok_eof)
1915 {
1916 lr_error (ldfile,
1917 _("premature end of `translit_ignore' definition"));
1918 return;
1919 }
1920
1921 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1922 {
1923 lr_error (ldfile, _("syntax error"));
1924 lr_ignore_rest (ldfile, 0);
1925 return;
1926 }
1927
1928 if (now->tok == tok_ucs4)
1929 from = now->val.ucs4;
1930 else
f0a4b6b1
UD
1931 /* Try to get the value. */
1932 from = repertoire_find_value (repertoire, now->val.str.startmb,
1933 now->val.str.lenmb);
a673fbcb
UD
1934
1935 if (from == ILLEGAL_CHAR_VALUE)
1936 {
1937 lr_error (ldfile, "invalid character name");
1938 newp = NULL;
1939 }
1940 else
1941 {
1942 newp = (struct translit_ignore_t *)
1943 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1944 newp->from = from;
1945 newp->to = from;
a0dc5206 1946 newp->step = 1;
a673fbcb
UD
1947
1948 newp->next = ctype->translit_ignore;
1949 ctype->translit_ignore = newp;
1950 }
1951
1952 /* Now we expect either a semicolon, an ellipsis, or the end of the
1953 line. */
1954 now = lr_token (ldfile, charmap, repertoire);
1955
a0dc5206 1956 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
a673fbcb
UD
1957 {
1958 /* XXX Should we bother implementing `....'? `...' certainly
1959 will not be implemented. */
1960 uint32_t to;
a0dc5206 1961 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
a673fbcb
UD
1962
1963 now = lr_token (ldfile, charmap, repertoire);
1964
1965 if (now->tok == tok_eol || now->tok == tok_eof)
1966 {
1967 lr_error (ldfile,
1968 _("premature end of `translit_ignore' definition"));
1969 return;
1970 }
1971
1972 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1973 {
1974 lr_error (ldfile, _("syntax error"));
1975 lr_ignore_rest (ldfile, 0);
1976 return;
1977 }
1978
1979 if (now->tok == tok_ucs4)
1980 to = now->val.ucs4;
1981 else
f0a4b6b1
UD
1982 /* Try to get the value. */
1983 to = repertoire_find_value (repertoire, now->val.str.startmb,
1984 now->val.str.lenmb);
a673fbcb
UD
1985
1986 if (to == ILLEGAL_CHAR_VALUE)
1987 lr_error (ldfile, "invalid character name");
1988 else
1989 {
1990 /* Make sure the `to'-value is larger. */
1991 if (to >= from)
a0dc5206
UD
1992 {
1993 newp->to = to;
1994 newp->step = step;
1995 }
a673fbcb
UD
1996 else
1997 lr_error (ldfile, _("\
1998to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1999 (to | from) < 65536 ? 4 : 8, to,
2000 (to | from) < 65536 ? 4 : 8, from);
2001 }
2002
2003 /* And the next token. */
2004 now = lr_token (ldfile, charmap, repertoire);
2005 }
2006
2007 if (now->tok == tok_eol || now->tok == tok_eof)
2008 /* We are done. */
2009 return;
2010
2011 if (now->tok == tok_semicolon)
2012 /* Next round. */
2013 continue;
2014
2015 /* If we come here something is wrong. */
2016 lr_error (ldfile, _("syntax error"));
2017 lr_ignore_rest (ldfile, 0);
2018 return;
2019 }
2020}
2021
2022
4b10dd6c
UD
2023/* The parser for the LC_CTYPE section of the locale definition. */
2024void
2025ctype_read (struct linereader *ldfile, struct localedef_t *result,
2026 struct charmap_t *charmap, const char *repertoire_name,
2027 int ignore_content)
19bc17a9 2028{
4b10dd6c
UD
2029 struct repertoire_t *repertoire = NULL;
2030 struct locale_ctype_t *ctype;
2031 struct token *now;
2032 enum token_t nowtok;
19bc17a9 2033 size_t cnt;
4b10dd6c
UD
2034 struct charseq *last_seq;
2035 uint32_t last_wch = 0;
2036 enum token_t last_token;
2037 enum token_t ellipsis_token;
a0dc5206 2038 int step;
4b10dd6c
UD
2039 char last_charcode[16];
2040 size_t last_charcode_len = 0;
2041 const char *last_str = NULL;
2042 int mapidx;
19bc17a9 2043
4b10dd6c
UD
2044 /* Get the repertoire we have to use. */
2045 if (repertoire_name != NULL)
2046 repertoire = repertoire_read (repertoire_name);
19bc17a9 2047
4b10dd6c
UD
2048 /* The rest of the line containing `LC_CTYPE' must be free. */
2049 lr_ignore_rest (ldfile, 1);
19bc17a9 2050
4b10dd6c
UD
2051
2052 do
19bc17a9 2053 {
4b10dd6c
UD
2054 now = lr_token (ldfile, charmap, NULL);
2055 nowtok = now->tok;
19bc17a9 2056 }
4b10dd6c 2057 while (nowtok == tok_eol);
19bc17a9 2058
4b10dd6c
UD
2059 /* If we see `copy' now we are almost done. */
2060 if (nowtok == tok_copy)
2061 {
01ff9d0b
UD
2062 handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
2063 LC_CTYPE, "LC_CTYPE", ignore_content);
4b10dd6c
UD
2064 return;
2065 }
75cd5204 2066
4b10dd6c
UD
2067 /* Prepare the data structures. */
2068 ctype_startup (ldfile, result, charmap, ignore_content);
2069 ctype = result->categories[LC_CTYPE].ctype;
2070
2071 /* Remember the repertoire we use. */
2072 if (!ignore_content)
2073 ctype->repertoire = repertoire;
2074
2075 while (1)
19bc17a9 2076 {
4b10dd6c
UD
2077 unsigned long int class_bit = 0;
2078 unsigned long int class256_bit = 0;
2079 int handle_digits = 0;
2080
2081 /* Of course we don't proceed beyond the end of file. */
2082 if (nowtok == tok_eof)
2083 break;
2084
2085 /* Ingore empty lines. */
2086 if (nowtok == tok_eol)
19bc17a9 2087 {
4b10dd6c
UD
2088 now = lr_token (ldfile, charmap, NULL);
2089 nowtok = now->tok;
2090 continue;
2091 }
19bc17a9 2092
4b10dd6c
UD
2093 switch (nowtok)
2094 {
5491da0d
UD
2095 case tok_charclass:
2096 now = lr_token (ldfile, charmap, NULL);
2097 while (now->tok == tok_ident || now->tok == tok_string)
2098 {
2099 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2100 now = lr_token (ldfile, charmap, NULL);
2101 if (now->tok != tok_semicolon)
2102 break;
2103 now = lr_token (ldfile, charmap, NULL);
2104 }
2105 if (now->tok != tok_eol)
2106 SYNTAX_ERROR (_("\
2107%s: syntax error in definition of new character class"), "LC_CTYPE");
2108 break;
2109
2110 case tok_charconv:
2111 now = lr_token (ldfile, charmap, NULL);
2112 while (now->tok == tok_ident || now->tok == tok_string)
2113 {
2114 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2115 now = lr_token (ldfile, charmap, NULL);
2116 if (now->tok != tok_semicolon)
2117 break;
2118 now = lr_token (ldfile, charmap, NULL);
2119 }
2120 if (now->tok != tok_eol)
2121 SYNTAX_ERROR (_("\
2122%s: syntax error in definition of new character map"), "LC_CTYPE");
2123 break;
2124
4b10dd6c 2125 case tok_class:
b9eb05d6
UD
2126 /* Ignore the rest of the line if we don't need the input of
2127 this line. */
2128 if (ignore_content)
2129 {
2130 lr_ignore_rest (ldfile, 0);
2131 break;
2132 }
2133
4b10dd6c
UD
2134 /* We simply forget the `class' keyword and use the following
2135 operand to determine the bit. */
2136 now = lr_token (ldfile, charmap, NULL);
2137 if (now->tok == tok_ident || now->tok == tok_string)
2138 {
87372aa9 2139 /* Must can be one of the predefined class names. */
4b10dd6c
UD
2140 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2141 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2142 break;
2143 if (cnt >= ctype->nr_charclass)
2144 {
011ebfab 2145#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2146 if (now->val.str.lenmb == 8
2147 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2148 class_bit = _ISwspecial1;
2149 else if (now->val.str.lenmb == 8
2150 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2151 class_bit = _ISwspecial2;
2152 else if (now->val.str.lenmb == 8
2153 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2154 class_bit = _ISwspecial3;
2155 else
011ebfab 2156#endif
4b10dd6c 2157 {
87372aa9
UD
2158 /* OK, it's a new class. */
2159 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 2160
87372aa9 2161 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
2162 }
2163 }
2164 else
7f653277
UD
2165 {
2166 class_bit = _ISwbit (cnt);
4b10dd6c 2167
7f653277
UD
2168 free (now->val.str.startmb);
2169 }
4b10dd6c
UD
2170 }
2171 else if (now->tok == tok_digit)
2172 goto handle_tok_digit;
2173 else if (now->tok < tok_upper || now->tok > tok_blank)
2174 goto err_label;
2175 else
2176 {
2177 class_bit = BITw (now->tok);
2178 class256_bit = BIT (now->tok);
2179 }
2180
2181 /* The next character must be a semicolon. */
2182 now = lr_token (ldfile, charmap, NULL);
2183 if (now->tok != tok_semicolon)
2184 goto err_label;
2185 goto read_charclass;
2186
2187 case tok_upper:
2188 case tok_lower:
2189 case tok_alpha:
2190 case tok_alnum:
2191 case tok_space:
2192 case tok_cntrl:
2193 case tok_punct:
2194 case tok_graph:
2195 case tok_print:
2196 case tok_xdigit:
2197 case tok_blank:
b9eb05d6
UD
2198 /* Ignore the rest of the line if we don't need the input of
2199 this line. */
2200 if (ignore_content)
2201 {
2202 lr_ignore_rest (ldfile, 0);
2203 break;
2204 }
2205
4b10dd6c
UD
2206 class_bit = BITw (now->tok);
2207 class256_bit = BIT (now->tok);
2208 handle_digits = 0;
2209 read_charclass:
2210 ctype->class_done |= class_bit;
2211 last_token = tok_none;
2212 ellipsis_token = tok_none;
a0dc5206 2213 step = 1;
4b10dd6c
UD
2214 now = lr_token (ldfile, charmap, NULL);
2215 while (now->tok != tok_eol && now->tok != tok_eof)
2216 {
2217 uint32_t wch;
2218 struct charseq *seq;
2219
2220 if (ellipsis_token == tok_none)
2221 {
2222 if (get_character (now, charmap, repertoire, &seq, &wch))
2223 goto err_label;
2224
2225 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2226 /* Yep, we can store information about this byte
2227 sequence. */
2228 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2229
2230 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2231 && class_bit != 0)
2232 /* We have the UCS4 position. */
2233 *find_idx (ctype, &ctype->class_collection,
2234 &ctype->class_collection_max,
2235 &ctype->class_collection_act, wch) |= class_bit;
2236
2237 last_token = now->tok;
549b3c3a 2238 /* Terminate the string. */
9e2b7438
UD
2239 if (last_token == tok_bsymbol)
2240 {
2241 now->val.str.startmb[now->val.str.lenmb] = '\0';
2242 last_str = now->val.str.startmb;
2243 }
2244 else
2245 last_str = NULL;
4b10dd6c
UD
2246 last_seq = seq;
2247 last_wch = wch;
2248 memcpy (last_charcode, now->val.charcode.bytes, 16);
2249 last_charcode_len = now->val.charcode.nbytes;
2250
2251 if (!ignore_content && handle_digits == 1)
2252 {
2253 /* We must store the digit values. */
2254 if (ctype->mbdigits_act == ctype->mbdigits_max)
2255 {
b9eb05d6 2256 ctype->mbdigits_max += 10;
4b10dd6c
UD
2257 ctype->mbdigits = xrealloc (ctype->mbdigits,
2258 (ctype->mbdigits_max
2259 * sizeof (char *)));
b9eb05d6 2260 ctype->wcdigits_max += 10;
4b10dd6c
UD
2261 ctype->wcdigits = xrealloc (ctype->wcdigits,
2262 (ctype->wcdigits_max
2263 * sizeof (uint32_t)));
2264 }
2265
2266 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2267 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2268 }
2269 else if (!ignore_content && handle_digits == 2)
2270 {
2271 /* We must store the digit values. */
2272 if (ctype->outdigits_act >= 10)
2273 {
2274 lr_error (ldfile, _("\
2275%s: field `%s' does not contain exactly ten entries"),
2276 "LC_CTYPE", "outdigit");
2277 goto err_label;
2278 }
2279
2280 ctype->mboutdigits[ctype->outdigits_act] = seq;
2281 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2282 ++ctype->outdigits_act;
2283 }
2284 }
2285 else
2286 {
2287 /* Now it gets complicated. We have to resolve the
2288 ellipsis problem. First we must distinguish between
2289 the different kind of ellipsis and this must match the
2290 tokens we have seen. */
2291 assert (last_token != tok_none);
2292
2293 if (last_token != now->tok)
2294 {
2295 lr_error (ldfile, _("\
2296ellipsis range must be marked by two operands of same type"));
2297 lr_ignore_rest (ldfile, 0);
2298 break;
2299 }
2300
2301 if (last_token == tok_bsymbol)
2302 {
2303 if (ellipsis_token == tok_ellipsis3)
2304 lr_error (ldfile, _("with symbolic name range values \
2305the absolute ellipsis `...' must not be used"));
2306
2307 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2308 repertoire, now, last_str,
2309 class256_bit, class_bit,
2310 (ellipsis_token
2311 == tok_ellipsis4
2312 ? 10 : 16),
2313 ignore_content,
a0dc5206 2314 handle_digits, step);
4b10dd6c
UD
2315 }
2316 else if (last_token == tok_ucs4)
2317 {
2318 if (ellipsis_token != tok_ellipsis2)
2319 lr_error (ldfile, _("\
2320with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2321
2322 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2323 repertoire, now, last_wch,
2324 class256_bit, class_bit,
a0dc5206
UD
2325 ignore_content, handle_digits,
2326 step);
4b10dd6c
UD
2327 }
2328 else
2329 {
2330 assert (last_token == tok_charcode);
2331
2332 if (ellipsis_token != tok_ellipsis3)
2333 lr_error (ldfile, _("\
2334with character code range values one must use the absolute ellipsis `...'"));
2335
2336 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2337 repertoire, now,
2338 last_charcode,
2339 last_charcode_len,
2340 class256_bit, class_bit,
2341 ignore_content,
2342 handle_digits);
2343 }
2344
2345 /* Now we have used the last value. */
2346 last_token = tok_none;
2347 }
2348
2349 /* Next we expect a semicolon or the end of the line. */
2350 now = lr_token (ldfile, charmap, NULL);
2351 if (now->tok == tok_eol || now->tok == tok_eof)
2352 break;
2353
2354 if (last_token != tok_none
a0dc5206 2355 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
4b10dd6c 2356 {
a0dc5206
UD
2357 if (now->tok == tok_ellipsis2_2)
2358 {
2359 now->tok = tok_ellipsis2;
2360 step = 2;
2361 }
2362 else if (now->tok == tok_ellipsis4_2)
2363 {
2364 now->tok = tok_ellipsis4;
2365 step = 2;
2366 }
2367
4b10dd6c 2368 ellipsis_token = now->tok;
a0dc5206 2369
4b10dd6c
UD
2370 now = lr_token (ldfile, charmap, NULL);
2371 continue;
2372 }
2373
2374 if (now->tok != tok_semicolon)
2375 goto err_label;
2376
2377 /* And get the next character. */
2378 now = lr_token (ldfile, charmap, NULL);
2379
2380 ellipsis_token = tok_none;
a0dc5206 2381 step = 1;
4b10dd6c
UD
2382 }
2383 break;
2384
2385 case tok_digit:
b9eb05d6
UD
2386 /* Ignore the rest of the line if we don't need the input of
2387 this line. */
2388 if (ignore_content)
42d7c593
UD
2389 {
2390 lr_ignore_rest (ldfile, 0);
2391 break;
2392 }
b9eb05d6 2393
4b10dd6c
UD
2394 handle_tok_digit:
2395 class_bit = _ISwdigit;
2396 class256_bit = _ISdigit;
2397 handle_digits = 1;
2398 goto read_charclass;
2399
2400 case tok_outdigit:
b9eb05d6
UD
2401 /* Ignore the rest of the line if we don't need the input of
2402 this line. */
2403 if (ignore_content)
2404 {
2405 lr_ignore_rest (ldfile, 0);
2406 break;
2407 }
2408
4b10dd6c
UD
2409 if (ctype->outdigits_act != 0)
2410 lr_error (ldfile, _("\
2411%s: field `%s' declared more than once"),
2412 "LC_CTYPE", "outdigit");
2413 class_bit = 0;
2414 class256_bit = 0;
2415 handle_digits = 2;
2416 goto read_charclass;
2417
2418 case tok_toupper:
b9eb05d6
UD
2419 /* Ignore the rest of the line if we don't need the input of
2420 this line. */
2421 if (ignore_content)
2422 {
2423 lr_ignore_rest (ldfile, 0);
2424 break;
2425 }
2426
4b10dd6c
UD
2427 mapidx = 0;
2428 goto read_mapping;
2429
2430 case tok_tolower:
b9eb05d6
UD
2431 /* Ignore the rest of the line if we don't need the input of
2432 this line. */
2433 if (ignore_content)
2434 {
2435 lr_ignore_rest (ldfile, 0);
2436 break;
2437 }
2438
4b10dd6c
UD
2439 mapidx = 1;
2440 goto read_mapping;
2441
2442 case tok_map:
b9eb05d6
UD
2443 /* Ignore the rest of the line if we don't need the input of
2444 this line. */
2445 if (ignore_content)
2446 {
2447 lr_ignore_rest (ldfile, 0);
2448 break;
2449 }
2450
4b10dd6c
UD
2451 /* We simply forget the `map' keyword and use the following
2452 operand to determine the mapping. */
2453 now = lr_token (ldfile, charmap, NULL);
2454 if (now->tok == tok_ident || now->tok == tok_string)
2455 {
2456 size_t cnt;
2457
2458 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2459 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2460 break;
2461
7f653277
UD
2462 if (cnt < ctype->map_collection_nr)
2463 free (now->val.str.startmb);
2464 else
87372aa9
UD
2465 /* OK, it's a new map. */
2466 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2467
2468 mapidx = cnt;
4b10dd6c
UD
2469 }
2470 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2471 goto err_label;
2472 else
2473 mapidx = now->tok - tok_toupper;
2474
2475 now = lr_token (ldfile, charmap, NULL);
2476 /* This better should be a semicolon. */
2477 if (now->tok != tok_semicolon)
2478 goto err_label;
2479
2480 read_mapping:
2481 /* Test whether this mapping was already defined. */
2482 if (ctype->tomap_done[mapidx])
2483 {
2484 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2485 ctype->mapnames[mapidx]);
2486 lr_ignore_rest (ldfile, 0);
2487 break;
2488 }
2489 ctype->tomap_done[mapidx] = 1;
2490
2491 now = lr_token (ldfile, charmap, NULL);
2492 while (now->tok != tok_eol && now->tok != tok_eof)
2493 {
2494 struct charseq *from_seq;
2495 uint32_t from_wch;
2496 struct charseq *to_seq;
2497 uint32_t to_wch;
2498
2499 /* Every pair starts with an opening brace. */
2500 if (now->tok != tok_open_brace)
2501 goto err_label;
2502
2503 /* Next comes the from-value. */
2504 now = lr_token (ldfile, charmap, NULL);
2505 if (get_character (now, charmap, repertoire, &from_seq,
2506 &from_wch) != 0)
2507 goto err_label;
2508
2509 /* The next is a comma. */
2510 now = lr_token (ldfile, charmap, NULL);
2511 if (now->tok != tok_comma)
2512 goto err_label;
2513
2514 /* And the other value. */
2515 now = lr_token (ldfile, charmap, NULL);
2516 if (get_character (now, charmap, repertoire, &to_seq,
2517 &to_wch) != 0)
2518 goto err_label;
2519
2520 /* And the last thing is the closing brace. */
2521 now = lr_token (ldfile, charmap, NULL);
2522 if (now->tok != tok_close_brace)
2523 goto err_label;
2524
2525 if (!ignore_content)
2526 {
2527 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2528 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2529 /* We can use this value. */
2530 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2531 = to_seq->bytes[0];
2532
2533 if (from_wch != ILLEGAL_CHAR_VALUE
2534 && to_wch != ILLEGAL_CHAR_VALUE)
2535 /* Both correct values. */
2536 *find_idx (ctype, &ctype->map_collection[mapidx],
2537 &ctype->map_collection_max[mapidx],
2538 &ctype->map_collection_act[mapidx],
2539 from_wch) = to_wch;
2540 }
2541
2542 /* Now comes a semicolon or the end of the line/file. */
2543 now = lr_token (ldfile, charmap, NULL);
2544 if (now->tok == tok_semicolon)
2545 now = lr_token (ldfile, charmap, NULL);
2546 }
2547 break;
2548
2549 case tok_translit_start:
b9eb05d6
UD
2550 /* Ignore the rest of the line if we don't need the input of
2551 this line. */
2552 if (ignore_content)
2553 {
2554 lr_ignore_rest (ldfile, 0);
2555 break;
2556 }
2557
4b10dd6c
UD
2558 /* The rest of the line better should be empty. */
2559 lr_ignore_rest (ldfile, 1);
2560
2561 /* We count here the number of allocated entries in the `translit'
2562 array. */
2563 cnt = 0;
2564
2565 /* We proceed until we see the `translit_end' token. */
2566 while (now = lr_token (ldfile, charmap, repertoire),
2567 now->tok != tok_translit_end && now->tok != tok_eof)
2568 {
2569 if (now->tok == tok_eol)
2570 /* Ignore empty lines. */
2571 continue;
2572
2573 if (now->tok == tok_translit_end)
2574 {
2575 lr_ignore_rest (ldfile, 0);
2576 break;
2577 }
2578
2579 if (now->tok == tok_include)
2580 {
2581 /* We have to include locale. */
2582 const char *locale_name;
2583 const char *repertoire_name;
2584
2585 now = lr_token (ldfile, charmap, NULL);
2586 /* This should be a string or an identifier. In any
2587 case something to name a locale. */
2588 if (now->tok != tok_string && now->tok != tok_ident)
2589 {
2590 translit_syntax:
2591 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2592 lr_ignore_rest (ldfile, 0);
2593 continue;
2594 }
2595 locale_name = now->val.str.startmb;
2596
2597 /* Next should be a semicolon. */
2598 now = lr_token (ldfile, charmap, NULL);
2599 if (now->tok != tok_semicolon)
2600 goto translit_syntax;
2601
2602 /* Now the repertoire name. */
2603 now = lr_token (ldfile, charmap, NULL);
2604 if ((now->tok != tok_string && now->tok != tok_ident)
2605 || now->val.str.startmb == NULL)
2606 goto translit_syntax;
2607 repertoire_name = now->val.str.startmb;
2608
2609 /* We must not have more than one `include'. */
2610 if (ctype->translit_copy_locale != NULL)
2611 {
2612 lr_error (ldfile, _("\
2613%s: only one `include' instruction allowed"), "LC_CTYPE");
2614 lr_ignore_rest (ldfile, 0);
2615 continue;
2616 }
2617
2618 ctype->translit_copy_locale = locale_name;
2619 ctype->translit_copy_repertoire = repertoire_name;
2620
2621 /* The rest of the line must be empty. */
2622 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2623
2624 /* Make sure the locale is read. */
2625 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
07dab0c3 2626 repertoire_name, 1, NULL);
a673fbcb
UD
2627 continue;
2628 }
2629 else if (now->tok == tok_default_missing)
2630 {
2631 uint32_t *wstr;
2632
2633 /* We expect a single character or string as the
2634 argument. */
2635 now = lr_token (ldfile, charmap, NULL);
2636 wstr = read_widestring (ldfile, now, charmap, repertoire);
2637
2638 if (wstr != NULL)
2639 {
2640 if (ctype->default_missing != NULL)
2641 {
2642 lr_error (ldfile, _("\
2643%s: duplicate `default_missing' definition"), "LC_CTYPE");
2644 error_at_line (0, 0, ctype->default_missing_file,
2645 ctype->default_missing_lineno,
2646 _("previous definition was here"));
2647 }
2648 else
2649 {
2650 ctype->default_missing = wstr;
2651 ctype->default_missing_file = ldfile->fname;
2652 ctype->default_missing_lineno = ldfile->lineno;
2653 }
2654 }
2655 lr_ignore_rest (ldfile, 1);
2656 continue;
2657 }
2658 else if (now->tok == tok_translit_ignore)
2659 {
2660 read_translit_ignore_entry (ldfile, ctype, charmap,
2661 repertoire);
4b10dd6c
UD
2662 continue;
2663 }
2664
2665 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2666 }
2667 break;
2668
2669 case tok_ident:
b9eb05d6
UD
2670 /* Ignore the rest of the line if we don't need the input of
2671 this line. */
2672 if (ignore_content)
2673 {
2674 lr_ignore_rest (ldfile, 0);
2675 break;
2676 }
2677
4b10dd6c
UD
2678 /* This could mean one of several things. First test whether
2679 it's a character class name. */
2680 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2681 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2682 break;
2683 if (cnt < ctype->nr_charclass)
2684 {
2685 class_bit = _ISwbit (cnt);
2686 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2687 free (now->val.str.startmb);
2688 goto read_charclass;
2689 }
5491da0d
UD
2690 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2691 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2692 break;
2693 if (cnt < ctype->map_collection_nr)
2694 {
2695 mapidx = cnt;
2696 free (now->val.str.startmb);
2697 goto read_mapping;
2698 }
011ebfab 2699#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2700 if (strcmp (now->val.str.startmb, "special1") == 0)
2701 {
2702 class_bit = _ISwspecial1;
2703 free (now->val.str.startmb);
2704 goto read_charclass;
2705 }
2706 if (strcmp (now->val.str.startmb, "special2") == 0)
2707 {
2708 class_bit = _ISwspecial2;
2709 free (now->val.str.startmb);
2710 goto read_charclass;
2711 }
2712 if (strcmp (now->val.str.startmb, "special3") == 0)
2713 {
2714 class_bit = _ISwspecial3;
2715 free (now->val.str.startmb);
2716 goto read_charclass;
2717 }
2718 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2719 {
2720 mapidx = 2;
2721 goto read_mapping;
2722 }
011ebfab 2723#endif
4b10dd6c
UD
2724 break;
2725
2726 case tok_end:
2727 /* Next we assume `LC_CTYPE'. */
2728 now = lr_token (ldfile, charmap, NULL);
2729 if (now->tok == tok_eof)
2730 break;
2731 if (now->tok == tok_eol)
2732 lr_error (ldfile, _("%s: incomplete `END' line"),
2733 "LC_CTYPE");
2734 else if (now->tok != tok_lc_ctype)
2735 lr_error (ldfile, _("\
2736%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2737 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2738 return;
2739
2740 default:
2741 err_label:
2742 if (now->tok != tok_eof)
2743 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2744 }
2745
4b10dd6c
UD
2746 /* Prepare for the next round. */
2747 now = lr_token (ldfile, charmap, NULL);
2748 nowtok = now->tok;
19bc17a9
RM
2749 }
2750
4b10dd6c
UD
2751 /* When we come here we reached the end of the file. */
2752 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2753}
2754
2755
2756static void
4b10dd6c
UD
2757set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2758 struct repertoire_t *repertoire)
19bc17a9 2759{
4b10dd6c
UD
2760 size_t cnt;
2761
19bc17a9
RM
2762 /* These function defines the default values for the classes and conversions
2763 according to POSIX.2 2.5.2.1.
2764 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2765 Don't move them unless you know what you do! */
2766
4b10dd6c 2767 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2768 {
2769 char tmp[2];
2770 int ch;
4b10dd6c
UD
2771 int bit = _ISbit (bitpos);
2772 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2773 /* Define string. */
2774 strcpy (tmp, "?");
2775
2776 for (ch = from; ch <= to; ++ch)
2777 {
4b10dd6c 2778 struct charseq *seq;
19bc17a9
RM
2779 tmp[0] = ch;
2780
4b10dd6c
UD
2781 seq = charmap_find_value (charmap, tmp, 1);
2782 if (seq == NULL)
2783 {
2784 if (!be_quiet)
2785 error (0, 0, _("\
2786%s: character `%s' not defined in charmap while needed as default value"),
2787 "LC_CTYPE", tmp);
19bc17a9 2788 }
4b10dd6c
UD
2789 else if (seq->nbytes != 1)
2790 error (0, 0, _("\
2791%s: character `%s' in charmap not representable with one byte"),
2792 "LC_CTYPE", tmp);
19bc17a9 2793 else
4b10dd6c 2794 ctype->class256_collection[seq->bytes[0]] |= bit;
f0a4b6b1
UD
2795
2796 /* No need to search here, the ASCII value is also the Unicode
2797 value. */
2798 ELEM (ctype, class_collection, , ch) |= bitw;
19bc17a9
RM
2799 }
2800 }
2801
2802 /* Set default values if keyword was not present. */
4b10dd6c 2803 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2804 /* "If this keyword [lower] is not specified, the lowercase letters
2805 `A' through `Z', ..., shall automatically belong to this class,
2806 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2807 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2808
4b10dd6c 2809 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2810 /* "If this keyword [lower] is not specified, the lowercase letters
2811 `a' through `z', ..., shall automatically belong to this class,
2812 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2813 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2814
4b10dd6c 2815 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2816 {
2817 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2818 class `lower' *must* be in class `alpha'. */
2819 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
2820 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2821
2822 for (cnt = 0; cnt < 256; ++cnt)
2823 if ((ctype->class256_collection[cnt] & mask) != 0)
2824 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9
RM
2825
2826 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2827 if ((ctype->class_collection[cnt] & maskw) != 0)
2828 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
2829 }
2830
4b10dd6c 2831 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2832 /* "If this keyword [digit] is not specified, the digits `0' through
2833 `9', ..., shall automatically belong to this class, with
2834 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2835 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2836
2837 /* "Only characters specified for the `alpha' and `digit' keyword
2838 shall be specified. Characters specified for the keyword `alpha'
2839 and `digit' are automatically included in this class. */
2840 {
2841 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
2842 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2843
2844 for (cnt = 0; cnt < 256; ++cnt)
2845 if ((ctype->class256_collection[cnt] & mask) != 0)
2846 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9
RM
2847
2848 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2849 if ((ctype->class_collection[cnt] & maskw) != 0)
2850 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
2851 }
2852
4b10dd6c 2853 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2854 /* "If this keyword [space] is not specified, the characters <space>,
2855 <form-feed>, <newline>, <carriage-return>, <tab>, and
2856 <vertical-tab>, ..., shall automatically belong to this class,
2857 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2858 {
4b10dd6c 2859 struct charseq *seq;
19bc17a9 2860
4b10dd6c 2861 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2862 if (seq == NULL)
2863 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2864 if (seq == NULL)
2865 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2866 if (seq == NULL)
880f421f
UD
2867 {
2868 if (!be_quiet)
2869 error (0, 0, _("\
4b10dd6c
UD
2870%s: character `%s' not defined while needed as default value"),
2871 "LC_CTYPE", "<space>");
2872 }
2873 else if (seq->nbytes != 1)
2874 error (0, 0, _("\
2875%s: character `%s' in charmap not representable with one byte"),
2876 "LC_CTYPE", "<space>");
2877 else
2878 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2879
f0a4b6b1 2880 /* No need to search. */
ce177a84 2881 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
19bc17a9 2882
4b10dd6c 2883 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
2884 if (seq == NULL)
2885 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 2886 if (seq == NULL)
880f421f
UD
2887 {
2888 if (!be_quiet)
2889 error (0, 0, _("\
4b10dd6c
UD
2890%s: character `%s' not defined while needed as default value"),
2891 "LC_CTYPE", "<form-feed>");
2892 }
2893 else if (seq->nbytes != 1)
2894 error (0, 0, _("\
2895%s: character `%s' in charmap not representable with one byte"),
2896 "LC_CTYPE", "<form-feed>");
2897 else
2898 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2899
f0a4b6b1 2900 /* No need to search. */
ce177a84 2901 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
4b10dd6c 2902
19bc17a9 2903
4b10dd6c 2904 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
2905 if (seq == NULL)
2906 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 2907 if (seq == NULL)
880f421f
UD
2908 {
2909 if (!be_quiet)
2910 error (0, 0, _("\
19bc17a9 2911character `%s' not defined while needed as default value"),
4b10dd6c
UD
2912 "<newline>");
2913 }
2914 else if (seq->nbytes != 1)
2915 error (0, 0, _("\
2916%s: character `%s' in charmap not representable with one byte"),
2917 "LC_CTYPE", "<newline>");
2918 else
2919 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2920
f0a4b6b1 2921 /* No need to search. */
ce177a84 2922 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
4b10dd6c 2923
19bc17a9 2924
4b10dd6c 2925 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
2926 if (seq == NULL)
2927 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 2928 if (seq == NULL)
880f421f
UD
2929 {
2930 if (!be_quiet)
2931 error (0, 0, _("\
4b10dd6c
UD
2932%s: character `%s' not defined while needed as default value"),
2933 "LC_CTYPE", "<carriage-return>");
2934 }
2935 else if (seq->nbytes != 1)
2936 error (0, 0, _("\
2937%s: character `%s' in charmap not representable with one byte"),
2938 "LC_CTYPE", "<carriage-return>");
2939 else
2940 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2941
f0a4b6b1 2942 /* No need to search. */
ce177a84 2943 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
4b10dd6c 2944
19bc17a9 2945
4b10dd6c 2946 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2947 if (seq == NULL)
2948 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 2949 if (seq == NULL)
880f421f
UD
2950 {
2951 if (!be_quiet)
2952 error (0, 0, _("\
4b10dd6c
UD
2953%s: character `%s' not defined while needed as default value"),
2954 "LC_CTYPE", "<tab>");
2955 }
2956 else if (seq->nbytes != 1)
2957 error (0, 0, _("\
2958%s: character `%s' in charmap not representable with one byte"),
2959 "LC_CTYPE", "<tab>");
2960 else
2961 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2962
f0a4b6b1 2963 /* No need to search. */
ce177a84 2964 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
4b10dd6c 2965
4b10dd6c
UD
2966
2967 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
2968 if (seq == NULL)
2969 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
2970 if (seq == NULL)
2971 {
2972 if (!be_quiet)
2973 error (0, 0, _("\
2974%s: character `%s' not defined while needed as default value"),
2975 "LC_CTYPE", "<vertical-tab>");
2976 }
2977 else if (seq->nbytes != 1)
2978 error (0, 0, _("\
2979%s: character `%s' in charmap not representable with one byte"),
2980 "LC_CTYPE", "<vertical-tab>");
2981 else
2982 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
2983
2984 /* No need to search. */
ce177a84 2985 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
19bc17a9
RM
2986 }
2987
4b10dd6c 2988 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
2989 /* "If this keyword is not specified, the digits `0' to `9', the
2990 uppercase letters `A' through `F', and the lowercase letters `a'
2991 through `f', ..., shell automatically belong to this class, with
2992 implementation defined character values." [P1003.2, 2.5.2.1] */
2993 {
4b10dd6c
UD
2994 set_default (BITPOS (tok_xdigit), '0', '9');
2995 set_default (BITPOS (tok_xdigit), 'A', 'F');
2996 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
2997 }
2998
4b10dd6c 2999 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
3000 /* "If this keyword [blank] is unspecified, the characters <space> and
3001 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3002 {
4b10dd6c 3003 struct charseq *seq;
19bc17a9 3004
4b10dd6c 3005 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3006 if (seq == NULL)
3007 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3008 if (seq == NULL)
3009 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 3010 if (seq == NULL)
880f421f
UD
3011 {
3012 if (!be_quiet)
3013 error (0, 0, _("\
4b10dd6c
UD
3014%s: character `%s' not defined while needed as default value"),
3015 "LC_CTYPE", "<space>");
3016 }
3017 else if (seq->nbytes != 1)
3018 error (0, 0, _("\
3019%s: character `%s' in charmap not representable with one byte"),
3020 "LC_CTYPE", "<space>");
3021 else
3022 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3023
f0a4b6b1 3024 /* No need to search. */
ce177a84 3025 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
4b10dd6c 3026
4b10dd6c
UD
3027
3028 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
3029 if (seq == NULL)
3030 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
3031 if (seq == NULL)
3032 {
3033 if (!be_quiet)
3034 error (0, 0, _("\
3035%s: character `%s' not defined while needed as default value"),
3036 "LC_CTYPE", "<tab>");
3037 }
3038 else if (seq->nbytes != 1)
3039 error (0, 0, _("\
3040%s: character `%s' in charmap not representable with one byte"),
3041 "LC_CTYPE", "<tab>");
3042 else
3043 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
3044
3045 /* No need to search. */
ce177a84 3046 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
19bc17a9
RM
3047 }
3048
4b10dd6c 3049 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
3050 /* "If this keyword [graph] is not specified, characters specified for
3051 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3052 shall belong to this character class." [P1003.2, 2.5.2.1] */
3053 {
3054 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3055 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3056 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3057 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3058 BITw (tok_punct);
19bc17a9
RM
3059 size_t cnt;
3060
3061 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3062 if ((ctype->class_collection[cnt] & maskw) != 0)
3063 ctype->class_collection[cnt] |= BITw (tok_graph);
4b10dd6c
UD
3064
3065 for (cnt = 0; cnt < 256; ++cnt)
3066 if ((ctype->class256_collection[cnt] & mask) != 0)
3067 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
3068 }
3069
4b10dd6c 3070 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
3071 /* "If this keyword [print] is not provided, characters specified for
3072 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3073 and the <space> character shall belong to this character class."
3074 [P1003.2, 2.5.2.1] */
3075 {
3076 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3077 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3078 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3079 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3080 BITw (tok_punct);
19bc17a9 3081 size_t cnt;
4b10dd6c 3082 struct charseq *seq;
19bc17a9
RM
3083
3084 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3085 if ((ctype->class_collection[cnt] & maskw) != 0)
3086 ctype->class_collection[cnt] |= BITw (tok_print);
19bc17a9 3087
4b10dd6c
UD
3088 for (cnt = 0; cnt < 256; ++cnt)
3089 if ((ctype->class256_collection[cnt] & mask) != 0)
3090 ctype->class256_collection[cnt] |= BIT (tok_print);
3091
3092
4b10dd6c 3093 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3094 if (seq == NULL)
3095 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3096 if (seq == NULL)
3097 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
3098 if (seq == NULL)
3099 {
3100 if (!be_quiet)
3101 error (0, 0, _("\
3102%s: character `%s' not defined while needed as default value"),
3103 "LC_CTYPE", "<space>");
3104 }
3105 else if (seq->nbytes != 1)
3106 error (0, 0, _("\
3107%s: character `%s' in charmap not representable with one byte"),
3108 "LC_CTYPE", "<space>");
3109 else
3110 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
3111
3112 /* No need to search. */
ce177a84 3113 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
19bc17a9
RM
3114 }
3115
4b10dd6c 3116 if (ctype->tomap_done[0] == 0)
6d52618b 3117 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
3118 `a' through `z', and their corresponding uppercase letters `A' to
3119 `Z', ..., shall automatically be included, with implementation-
3120 defined character values." [P1003.2, 2.5.2.1] */
3121 {
3122 char tmp[4];
3123 int ch;
3124
3125 strcpy (tmp, "<?>");
3126
3127 for (ch = 'a'; ch <= 'z'; ++ch)
3128 {
4b10dd6c 3129 struct charseq *seq_from, *seq_to;
19bc17a9
RM
3130
3131 tmp[1] = (char) ch;
3132
4b10dd6c
UD
3133 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3134 if (seq_from == NULL)
19bc17a9 3135 {
880f421f
UD
3136 if (!be_quiet)
3137 error (0, 0, _("\
4b10dd6c
UD
3138%s: character `%s' not defined while needed as default value"),
3139 "LC_CTYPE", tmp);
3140 }
3141 else if (seq_from->nbytes != 1)
3142 {
3143 if (!be_quiet)
3144 error (0, 0, _("\
3145%s: character `%s' needed as default value not representable with one byte"),
3146 "LC_CTYPE", tmp);
3147 }
3148 else
3149 {
3150 /* This conversion is implementation defined. */
3151 tmp[1] = (char) (ch + ('A' - 'a'));
3152 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3153 if (seq_to == NULL)
3154 {
3155 if (!be_quiet)
3156 error (0, 0, _("\
3157%s: character `%s' not defined while needed as default value"),
3158 "LC_CTYPE", tmp);
3159 }
3160 else if (seq_to->nbytes != 1)
3161 {
3162 if (!be_quiet)
3163 error (0, 0, _("\
3164%s: character `%s' needed as default value not representable with one byte"),
3165 "LC_CTYPE", tmp);
3166 }
3167 else
3168 /* The index [0] is determined by the order of the
3169 `ctype_map_newP' calls in `ctype_startup'. */
3170 ctype->map256_collection[0][seq_from->bytes[0]]
3171 = seq_to->bytes[0];
19bc17a9 3172 }
f0a4b6b1
UD
3173
3174 /* No need to search. */
3175 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
3176 }
3177 }
3178
4b10dd6c 3179 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
3180 /* "If this keyword [tolower] is not specified, the mapping shall be
3181 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3182 {
19bc17a9
RM
3183 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3184 if (ctype->map_collection[0][cnt] != 0)
3185 ELEM (ctype, map_collection, [1],
3186 ctype->map_collection[0][cnt])
3187 = ctype->charnames[cnt];
4b10dd6c
UD
3188
3189 for (cnt = 0; cnt < 256; ++cnt)
3190 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 3191 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
3192 }
3193
3194 if (ctype->outdigits_act == 0)
3195 {
3196 for (cnt = 0; cnt < 10; ++cnt)
3197 {
3198 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3199 digits + cnt, 1);
3200
3201 if (ctype->mboutdigits[cnt] == NULL)
1b97149d
UD
3202 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3203 longnames[cnt],
3204 strlen (longnames[cnt]));
b9eb05d6 3205
1b97149d
UD
3206 if (ctype->mboutdigits[cnt] == NULL)
3207 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3208 uninames[cnt], 9);
b9eb05d6 3209
1b97149d 3210 if (ctype->mboutdigits[cnt] == NULL)
b9eb05d6 3211 {
1b97149d
UD
3212 /* Provide a replacement. */
3213 error (0, 0, _("\
3214no output digits defined and none of the standard names in the charmap"));
b9eb05d6 3215
1b97149d
UD
3216 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3217 sizeof (struct charseq)
3218 + 1);
b9eb05d6 3219
1b97149d
UD
3220 /* This is better than nothing. */
3221 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3222 ctype->mboutdigits[cnt]->nbytes = 1;
b9eb05d6 3223 }
1b97149d
UD
3224
3225 ctype->wcoutdigits[cnt] = L'0' + cnt;
4b10dd6c
UD
3226 }
3227
3228 ctype->outdigits_act = 10;
19bc17a9
RM
3229 }
3230}
3231
3232
ef446144
UD
3233/* Construction of sparse 3-level tables.
3234 See wchar-lookup.h for their structure and the meaning of p and q. */
3235
3236struct wctype_table
3237{
3238 /* Parameters. */
3239 unsigned int p;
3240 unsigned int q;
3241 /* Working representation. */
3242 size_t level1_alloc;
3243 size_t level1_size;
3244 uint32_t *level1;
3245 size_t level2_alloc;
3246 size_t level2_size;
3247 uint32_t *level2;
3248 size_t level3_alloc;
3249 size_t level3_size;
3250 uint32_t *level3;
3251 /* Compressed representation. */
3252 size_t result_size;
3253 char *result;
3254};
3255
3256/* Initialize. Assumes t->p and t->q have already been set. */
3257static inline void
3258wctype_table_init (struct wctype_table *t)
3259{
3260 t->level1_alloc = t->level1_size = 0;
3261 t->level2_alloc = t->level2_size = 0;
3262 t->level3_alloc = t->level3_size = 0;
3263}
3264
ec08818d
UD
3265/* Retrieve an entry. */
3266static inline int
3267wctype_table_get (struct wctype_table *t, uint32_t wc)
3268{
3269 uint32_t index1 = wc >> (t->q + t->p + 5);
3270 if (index1 < t->level1_size)
3271 {
3272 uint32_t lookup1 = t->level1[index1];
3273 if (lookup1 != ~((uint32_t) 0))
3274 {
3275 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3276 + (lookup1 << t->q);
3277 uint32_t lookup2 = t->level2[index2];
3278 if (lookup2 != ~((uint32_t) 0))
3279 {
3280 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3281 + (lookup2 << t->p);
3282 uint32_t lookup3 = t->level3[index3];
3283 uint32_t index4 = wc & 0x1f;
3284
3285 return (lookup3 >> index4) & 1;
3286 }
3287 }
3288 }
3289 return 0;
3290}
3291
ef446144
UD
3292/* Add one entry. */
3293static void
3294wctype_table_add (struct wctype_table *t, uint32_t wc)
3295{
3296 uint32_t index1 = wc >> (t->q + t->p + 5);
3297 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3298 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3299 uint32_t index4 = wc & 0x1f;
3300 size_t i, i1, i2;
3301
3302 if (index1 >= t->level1_size)
3303 {
3304 if (index1 >= t->level1_alloc)
3305 {
3306 size_t alloc = 2 * t->level1_alloc;
3307 if (alloc <= index1)
3308 alloc = index1 + 1;
3309 t->level1 = (t->level1_alloc > 0
3310 ? (uint32_t *) xrealloc ((char *) t->level1,
3311 alloc * sizeof (uint32_t))
3312 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3313 t->level1_alloc = alloc;
3314 }
3315 while (index1 >= t->level1_size)
3316 t->level1[t->level1_size++] = ~((uint32_t) 0);
3317 }
3318
3319 if (t->level1[index1] == ~((uint32_t) 0))
3320 {
3321 if (t->level2_size == t->level2_alloc)
3322 {
3323 size_t alloc = 2 * t->level2_alloc + 1;
3324 t->level2 = (t->level2_alloc > 0
3325 ? (uint32_t *) xrealloc ((char *) t->level2,
3326 (alloc << t->q) * sizeof (uint32_t))
3327 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3328 t->level2_alloc = alloc;
3329 }
3330 i1 = t->level2_size << t->q;
3331 i2 = (t->level2_size + 1) << t->q;
3332 for (i = i1; i < i2; i++)
3333 t->level2[i] = ~((uint32_t) 0);
3334 t->level1[index1] = t->level2_size++;
3335 }
3336
3337 index2 += t->level1[index1] << t->q;
3338
3339 if (t->level2[index2] == ~((uint32_t) 0))
3340 {
3341 if (t->level3_size == t->level3_alloc)
3342 {
3343 size_t alloc = 2 * t->level3_alloc + 1;
3344 t->level3 = (t->level3_alloc > 0
3345 ? (uint32_t *) xrealloc ((char *) t->level3,
3346 (alloc << t->p) * sizeof (uint32_t))
3347 : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
3348 t->level3_alloc = alloc;
3349 }
3350 i1 = t->level3_size << t->p;
3351 i2 = (t->level3_size + 1) << t->p;
3352 for (i = i1; i < i2; i++)
3353 t->level3[i] = 0;
3354 t->level2[index2] = t->level3_size++;
3355 }
3356
3357 index3 += t->level2[index2] << t->p;
3358
3359 t->level3[index3] |= (uint32_t)1 << index4;
3360}
3361
3362/* Finalize and shrink. */
3363static void
3364wctype_table_finalize (struct wctype_table *t)
3365{
3366 size_t i, j, k;
3367 uint32_t reorder3[t->level3_size];
3368 uint32_t reorder2[t->level2_size];
3369 uint32_t level1_offset, level2_offset, level3_offset;
3370
3371 /* Uniquify level3 blocks. */
3372 k = 0;
3373 for (j = 0; j < t->level3_size; j++)
3374 {
3375 for (i = 0; i < k; i++)
3376 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3377 (1 << t->p) * sizeof (uint32_t)) == 0)
3378 break;
3379 /* Relocate block j to block i. */
3380 reorder3[j] = i;
3381 if (i == k)
3382 {
3383 if (i != j)
3384 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3385 (1 << t->p) * sizeof (uint32_t));
3386 k++;
3387 }
3388 }
3389 t->level3_size = k;
3390
3391 for (i = 0; i < (t->level2_size << t->q); i++)
3392 if (t->level2[i] != ~((uint32_t) 0))
3393 t->level2[i] = reorder3[t->level2[i]];
3394
3395 /* Uniquify level2 blocks. */
3396 k = 0;
3397 for (j = 0; j < t->level2_size; j++)
3398 {
3399 for (i = 0; i < k; i++)
3400 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3401 (1 << t->q) * sizeof (uint32_t)) == 0)
3402 break;
3403 /* Relocate block j to block i. */
3404 reorder2[j] = i;
3405 if (i == k)
3406 {
3407 if (i != j)
3408 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3409 (1 << t->q) * sizeof (uint32_t));
3410 k++;
3411 }
3412 }
3413 t->level2_size = k;
3414
3415 for (i = 0; i < t->level1_size; i++)
3416 if (t->level1[i] != ~((uint32_t) 0))
3417 t->level1[i] = reorder2[t->level1[i]];
3418
3419 /* Create and fill the resulting compressed representation. */
3420 t->result_size =
3421 5 * sizeof (uint32_t)
3422 + t->level1_size * sizeof (uint32_t)
3423 + (t->level2_size << t->q) * sizeof (uint32_t)
3424 + (t->level3_size << t->p) * sizeof (uint32_t);
3425 t->result = (char *) xmalloc (t->result_size);
3426
3427 level1_offset =
3428 5 * sizeof (uint32_t);
3429 level2_offset =
3430 5 * sizeof (uint32_t)
3431 + t->level1_size * sizeof (uint32_t);
3432 level3_offset =
3433 5 * sizeof (uint32_t)
3434 + t->level1_size * sizeof (uint32_t)
3435 + (t->level2_size << t->q) * sizeof (uint32_t);
3436
3437 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3438 ((uint32_t *) t->result)[1] = t->level1_size;
3439 ((uint32_t *) t->result)[2] = t->p + 5;
3440 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3441 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3442
3443 for (i = 0; i < t->level1_size; i++)
3444 ((uint32_t *) (t->result + level1_offset))[i] =
3445 (t->level1[i] == ~((uint32_t) 0)
3446 ? 0
3447 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3448
3449 for (i = 0; i < (t->level2_size << t->q); i++)
3450 ((uint32_t *) (t->result + level2_offset))[i] =
3451 (t->level2[i] == ~((uint32_t) 0)
3452 ? 0
3453 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3454
3455 for (i = 0; i < (t->level3_size << t->p); i++)
3456 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3457
3458 if (t->level1_alloc > 0)
3459 free (t->level1);
3460 if (t->level2_alloc > 0)
3461 free (t->level2);
3462 if (t->level3_alloc > 0)
3463 free (t->level3);
3464}
3465
3466struct wcwidth_table
3467{
3468 /* Parameters. */
3469 unsigned int p;
3470 unsigned int q;
3471 /* Working representation. */
3472 size_t level1_alloc;
3473 size_t level1_size;
3474 uint32_t *level1;
3475 size_t level2_alloc;
3476 size_t level2_size;
3477 uint32_t *level2;
3478 size_t level3_alloc;
3479 size_t level3_size;
3480 uint8_t *level3;
3481 /* Compressed representation. */
3482 size_t result_size;
3483 char *result;
3484};
3485
3486/* Initialize. Assumes t->p and t->q have already been set. */
3487static inline void
3488wcwidth_table_init (struct wcwidth_table *t)
3489{
3490 t->level1_alloc = t->level1_size = 0;
3491 t->level2_alloc = t->level2_size = 0;
3492 t->level3_alloc = t->level3_size = 0;
3493}
3494
ec08818d
UD
3495/* Retrieve an entry. */
3496static inline uint8_t
3497wcwidth_table_get (struct wcwidth_table *t, uint32_t wc)
3498{
3499 uint32_t index1 = wc >> (t->q + t->p);
3500 if (index1 < t->level1_size)
3501 {
3502 uint32_t lookup1 = t->level1[index1];
3503 if (lookup1 != ~((uint32_t) 0))
3504 {
3505 uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1))
3506 + (lookup1 << t->q);
3507 uint32_t lookup2 = t->level2[index2];
3508 if (lookup2 != ~((uint32_t) 0))
3509 {
3510 uint32_t index3 = (wc & ((1 << t->p) - 1))
3511 + (lookup2 << t->p);
3512 uint8_t lookup3 = t->level3[index3];
3513
3514 return lookup3;
3515 }
3516 }
3517 }
3518 return 0xff;
3519}
3520
ef446144
UD
3521/* Add one entry. */
3522static void
3523wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width)
3524{
3525 uint32_t index1 = wc >> (t->q + t->p);
3526 uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3527 uint32_t index3 = wc & ((1 << t->p) - 1);
3528 size_t i, i1, i2;
3529
ec08818d 3530 if (width == wcwidth_table_get (t, wc))
ef446144
UD
3531 return;
3532
3533 if (index1 >= t->level1_size)
3534 {
3535 if (index1 >= t->level1_alloc)
3536 {
3537 size_t alloc = 2 * t->level1_alloc;
3538 if (alloc <= index1)
3539 alloc = index1 + 1;
3540 t->level1 = (t->level1_alloc > 0
3541 ? (uint32_t *) xrealloc ((char *) t->level1,
3542 alloc * sizeof (uint32_t))
3543 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3544 t->level1_alloc = alloc;
3545 }
3546 while (index1 >= t->level1_size)
3547 t->level1[t->level1_size++] = ~((uint32_t) 0);
3548 }
3549
3550 if (t->level1[index1] == ~((uint32_t) 0))
3551 {
3552 if (t->level2_size == t->level2_alloc)
3553 {
3554 size_t alloc = 2 * t->level2_alloc + 1;
3555 t->level2 = (t->level2_alloc > 0
3556 ? (uint32_t *) xrealloc ((char *) t->level2,
3557 (alloc << t->q) * sizeof (uint32_t))
3558 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3559 t->level2_alloc = alloc;
3560 }
3561 i1 = t->level2_size << t->q;
3562 i2 = (t->level2_size + 1) << t->q;
3563 for (i = i1; i < i2; i++)
3564 t->level2[i] = ~((uint32_t) 0);
3565 t->level1[index1] = t->level2_size++;
3566 }
3567
3568 index2 += t->level1[index1] << t->q;
3569
3570 if (t->level2[index2] == ~((uint32_t) 0))
3571 {
3572 if (t->level3_size == t->level3_alloc)
3573 {
3574 size_t alloc = 2 * t->level3_alloc + 1;
3575 t->level3 = (t->level3_alloc > 0
3576 ? (uint8_t *) xrealloc ((char *) t->level3,
3577 (alloc << t->p) * sizeof (uint8_t))
3578 : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t)));
3579 t->level3_alloc = alloc;
3580 }
3581 i1 = t->level3_size << t->p;
3582 i2 = (t->level3_size + 1) << t->p;
3583 for (i = i1; i < i2; i++)
3584 t->level3[i] = 0xff;
3585 t->level2[index2] = t->level3_size++;
3586 }
3587
3588 index3 += t->level2[index2] << t->p;
3589
3590 t->level3[index3] = width;
3591}
3592
3593/* Finalize and shrink. */
3594static void
3595wcwidth_table_finalize (struct wcwidth_table *t)
3596{
3597 size_t i, j, k;
3598 uint32_t reorder3[t->level3_size];
3599 uint32_t reorder2[t->level2_size];
3600 uint32_t level1_offset, level2_offset, level3_offset, last_offset;
3601
3602 /* Uniquify level3 blocks. */
3603 k = 0;
3604 for (j = 0; j < t->level3_size; j++)
3605 {
3606 for (i = 0; i < k; i++)
3607 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3608 (1 << t->p) * sizeof (uint8_t)) == 0)
3609 break;
3610 /* Relocate block j to block i. */
3611 reorder3[j] = i;
3612 if (i == k)
3613 {
3614 if (i != j)
3615 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3616 (1 << t->p) * sizeof (uint8_t));
3617 k++;
3618 }
3619 }
3620 t->level3_size = k;
3621
3622 for (i = 0; i < (t->level2_size << t->q); i++)
3623 if (t->level2[i] != ~((uint32_t) 0))
3624 t->level2[i] = reorder3[t->level2[i]];
3625
3626 /* Uniquify level2 blocks. */
3627 k = 0;
3628 for (j = 0; j < t->level2_size; j++)
3629 {
3630 for (i = 0; i < k; i++)
3631 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3632 (1 << t->q) * sizeof (uint32_t)) == 0)
3633 break;
3634 /* Relocate block j to block i. */
3635 reorder2[j] = i;
3636 if (i == k)
3637 {
3638 if (i != j)
3639 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3640 (1 << t->q) * sizeof (uint32_t));
3641 k++;
3642 }
3643 }
3644 t->level2_size = k;
3645
3646 for (i = 0; i < t->level1_size; i++)
3647 if (t->level1[i] != ~((uint32_t) 0))
3648 t->level1[i] = reorder2[t->level1[i]];
3649
3650 /* Create and fill the resulting compressed representation. */
3651 last_offset =
3652 5 * sizeof (uint32_t)
3653 + t->level1_size * sizeof (uint32_t)
3654 + (t->level2_size << t->q) * sizeof (uint32_t)
3655 + (t->level3_size << t->p) * sizeof (uint8_t);
3656 t->result_size = (last_offset + 3) & ~3ul;
3657 t->result = (char *) xmalloc (t->result_size);
3658
3659 level1_offset =
3660 5 * sizeof (uint32_t);
3661 level2_offset =
3662 5 * sizeof (uint32_t)
3663 + t->level1_size * sizeof (uint32_t);
3664 level3_offset =
3665 5 * sizeof (uint32_t)
3666 + t->level1_size * sizeof (uint32_t)
3667 + (t->level2_size << t->q) * sizeof (uint32_t);
3668
3669 ((uint32_t *) t->result)[0] = t->q + t->p;
3670 ((uint32_t *) t->result)[1] = t->level1_size;
3671 ((uint32_t *) t->result)[2] = t->p;
3672 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3673 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3674
3675 for (i = 0; i < t->level1_size; i++)
3676 ((uint32_t *) (t->result + level1_offset))[i] =
3677 (t->level1[i] == ~((uint32_t) 0)
3678 ? 0
3679 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3680
3681 for (i = 0; i < (t->level2_size << t->q); i++)
3682 ((uint32_t *) (t->result + level2_offset))[i] =
3683 (t->level2[i] == ~((uint32_t) 0)
3684 ? 0
3685 : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset);
3686
3687 for (i = 0; i < (t->level3_size << t->p); i++)
3688 ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i];
3689
3690 if (last_offset < t->result_size)
3691 memset (t->result + last_offset, 0, t->result_size - last_offset);
3692
3693 if (t->level1_alloc > 0)
3694 free (t->level1);
3695 if (t->level2_alloc > 0)
3696 free (t->level2);
3697 if (t->level3_alloc > 0)
3698 free (t->level3);
3699}
3700
3701struct wctrans_table
3702{
3703 /* Parameters. */
3704 unsigned int p;
3705 unsigned int q;
3706 /* Working representation. */
3707 size_t level1_alloc;
3708 size_t level1_size;
3709 uint32_t *level1;
3710 size_t level2_alloc;
3711 size_t level2_size;
3712 uint32_t *level2;
3713 size_t level3_alloc;
3714 size_t level3_size;
3715 int32_t *level3;
3716 /* Compressed representation. */
3717 size_t result_size;
3718 char *result;
3719};
3720
3721/* Initialize. Assumes t->p and t->q have already been set. */
3722static inline void
3723wctrans_table_init (struct wctrans_table *t)
3724{
3725 t->level1_alloc = t->level1_size = 0;
3726 t->level2_alloc = t->level2_size = 0;
3727 t->level3_alloc = t->level3_size = 0;
3728}
3729
ec08818d
UD
3730/* Retrieve an entry. */
3731static inline uint32_t
3732wctrans_table_get (struct wctrans_table *t, uint32_t wc)
3733{
3734 uint32_t index1 = wc >> (t->q + t->p);
3735 if (index1 < t->level1_size)
3736 {
3737 uint32_t lookup1 = t->level1[index1];
3738 if (lookup1 != ~((uint32_t) 0))
3739 {
3740 uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1))
3741 + (lookup1 << t->q);
3742 uint32_t lookup2 = t->level2[index2];
3743 if (lookup2 != ~((uint32_t) 0))
3744 {
3745 uint32_t index3 = (wc & ((1 << t->p) - 1))
3746 + (lookup2 << t->p);
3747 int32_t lookup3 = t->level3[index3];
3748
3749 return wc + lookup3;
3750 }
3751 }
3752 }
3753 return wc;
3754}
3755
ef446144
UD
3756/* Add one entry. */
3757static void
3758wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3759{
3760 uint32_t index1 = wc >> (t->q + t->p);
3761 uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3762 uint32_t index3 = wc & ((1 << t->p) - 1);
ec08818d 3763 int32_t value;
ef446144
UD
3764 size_t i, i1, i2;
3765
ec08818d 3766 if (mapped_wc == wctrans_table_get (t, wc))
ef446144
UD
3767 return;
3768
ec08818d
UD
3769 value = (int32_t) mapped_wc - (int32_t) wc;
3770
ef446144
UD
3771 if (index1 >= t->level1_size)
3772 {
3773 if (index1 >= t->level1_alloc)
3774 {
3775 size_t alloc = 2 * t->level1_alloc;
3776 if (alloc <= index1)
3777 alloc = index1 + 1;
3778 t->level1 = (t->level1_alloc > 0
3779 ? (uint32_t *) xrealloc ((char *) t->level1,
3780 alloc * sizeof (uint32_t))
3781 : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3782 t->level1_alloc = alloc;
3783 }
3784 while (index1 >= t->level1_size)
3785 t->level1[t->level1_size++] = ~((uint32_t) 0);
3786 }
3787
3788 if (t->level1[index1] == ~((uint32_t) 0))
3789 {
3790 if (t->level2_size == t->level2_alloc)
3791 {
3792 size_t alloc = 2 * t->level2_alloc + 1;
3793 t->level2 = (t->level2_alloc > 0
3794 ? (uint32_t *) xrealloc ((char *) t->level2,
3795 (alloc << t->q) * sizeof (uint32_t))
3796 : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3797 t->level2_alloc = alloc;
3798 }
3799 i1 = t->level2_size << t->q;
3800 i2 = (t->level2_size + 1) << t->q;
3801 for (i = i1; i < i2; i++)
3802 t->level2[i] = ~((uint32_t) 0);
3803 t->level1[index1] = t->level2_size++;
3804 }
3805
3806 index2 += t->level1[index1] << t->q;
3807
3808 if (t->level2[index2] == ~((uint32_t) 0))
3809 {
3810 if (t->level3_size == t->level3_alloc)
3811 {
3812 size_t alloc = 2 * t->level3_alloc + 1;
3813 t->level3 = (t->level3_alloc > 0
3814 ? (int32_t *) xrealloc ((char *) t->level3,
3815 (alloc << t->p) * sizeof (int32_t))
3816 : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t)));
3817 t->level3_alloc = alloc;
3818 }
3819 i1 = t->level3_size << t->p;
3820 i2 = (t->level3_size + 1) << t->p;
3821 for (i = i1; i < i2; i++)
3822 t->level3[i] = 0;
3823 t->level2[index2] = t->level3_size++;
3824 }
3825
3826 index3 += t->level2[index2] << t->p;
3827
3828 t->level3[index3] = value;
3829}
3830
3831/* Finalize and shrink. */
3832static void
3833wctrans_table_finalize (struct wctrans_table *t)
3834{
3835 size_t i, j, k;
3836 uint32_t reorder3[t->level3_size];
3837 uint32_t reorder2[t->level2_size];
3838 uint32_t level1_offset, level2_offset, level3_offset;
3839
3840 /* Uniquify level3 blocks. */
3841 k = 0;
3842 for (j = 0; j < t->level3_size; j++)
3843 {
3844 for (i = 0; i < k; i++)
3845 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3846 (1 << t->p) * sizeof (int32_t)) == 0)
3847 break;
3848 /* Relocate block j to block i. */
3849 reorder3[j] = i;
3850 if (i == k)
3851 {
3852 if (i != j)
3853 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3854 (1 << t->p) * sizeof (int32_t));
3855 k++;
3856 }
3857 }
3858 t->level3_size = k;
3859
3860 for (i = 0; i < (t->level2_size << t->q); i++)
3861 if (t->level2[i] != ~((uint32_t) 0))
3862 t->level2[i] = reorder3[t->level2[i]];
3863
3864 /* Uniquify level2 blocks. */
3865 k = 0;
3866 for (j = 0; j < t->level2_size; j++)
3867 {
3868 for (i = 0; i < k; i++)
3869 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3870 (1 << t->q) * sizeof (uint32_t)) == 0)
3871 break;
3872 /* Relocate block j to block i. */
3873 reorder2[j] = i;
3874 if (i == k)
3875 {
3876 if (i != j)
3877 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3878 (1 << t->q) * sizeof (uint32_t));
3879 k++;
3880 }
3881 }
3882 t->level2_size = k;
3883
3884 for (i = 0; i < t->level1_size; i++)
3885 if (t->level1[i] != ~((uint32_t) 0))
3886 t->level1[i] = reorder2[t->level1[i]];
3887
3888 /* Create and fill the resulting compressed representation. */
3889 t->result_size =
3890 5 * sizeof (uint32_t)
3891 + t->level1_size * sizeof (uint32_t)
3892 + (t->level2_size << t->q) * sizeof (uint32_t)
3893 + (t->level3_size << t->p) * sizeof (int32_t);
3894 t->result = (char *) xmalloc (t->result_size);
3895
3896 level1_offset =
3897 5 * sizeof (uint32_t);
3898 level2_offset =
3899 5 * sizeof (uint32_t)
3900 + t->level1_size * sizeof (uint32_t);
3901 level3_offset =
3902 5 * sizeof (uint32_t)
3903 + t->level1_size * sizeof (uint32_t)
3904 + (t->level2_size << t->q) * sizeof (uint32_t);
3905
3906 ((uint32_t *) t->result)[0] = t->q + t->p;
3907 ((uint32_t *) t->result)[1] = t->level1_size;
3908 ((uint32_t *) t->result)[2] = t->p;
3909 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3910 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3911
3912 for (i = 0; i < t->level1_size; i++)
3913 ((uint32_t *) (t->result + level1_offset))[i] =
3914 (t->level1[i] == ~((uint32_t) 0)
3915 ? 0
3916 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3917
3918 for (i = 0; i < (t->level2_size << t->q); i++)
3919 ((uint32_t *) (t->result + level2_offset))[i] =
3920 (t->level2[i] == ~((uint32_t) 0)
3921 ? 0
3922 : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset);
3923
3924 for (i = 0; i < (t->level3_size << t->p); i++)
3925 ((int32_t *) (t->result + level3_offset))[i] = t->level3[i];
3926
3927 if (t->level1_alloc > 0)
3928 free (t->level1);
3929 if (t->level2_alloc > 0)
3930 free (t->level2);
3931 if (t->level3_alloc > 0)
3932 free (t->level3);
3933}
3934
3935
19bc17a9 3936static void
4b10dd6c
UD
3937allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3938 struct repertoire_t *repertoire)
19bc17a9
RM
3939{
3940 size_t idx;
a53e3292 3941 size_t width_table_size;
0e16ecfa
UD
3942 const void *key;
3943 size_t len;
3944 void *vdata;
3945 void *curs;
5d431a3e 3946
6d52618b
UD
3947 /* First we have to decide how we organize the arrays. It is easy
3948 for a one-byte character set. But multi-byte character set
3949 cannot be stored flat because the chars might be sparsely used.
3950 So we determine an optimal hashing function for the used
3951 characters.
3952
3953 We use a very trivial hashing function to store the sparse
3954 table. CH % TABSIZE is used as an index. To solve multiple hits
3955 we have N planes. This guarantees a fixed search time for a
42d7c593 3956 character [N / 2]. In the following code we determine the minimum
66ac0abe
UD
3957 value for TABSIZE * N, where TABSIZE >= 256.
3958
3959 Some people complained that this algorithm takes too long. Well,
3960 go on, improve it. But changing the step size is *not* an
3961 option. Some people changed this to use only sizes of prime
3962 numbers. Think again, do some math. We are looking for the
3963 optimal solution, not something which works in general. Unless
3964 somebody can provide a dynamic programming solution I think this
3965 implementation is as good as it can get. */
19bc17a9
RM
3966 size_t min_total = UINT_MAX;
3967 size_t act_size = 256;
3968
ef446144
UD
3969 if (oldstyle_tables)
3970 {
3971 if (!be_quiet && ctype->charnames_act > 512)
3972 fputs (_("\
19bc17a9 3973Computing table size for character classes might take a while..."),
ef446144 3974 stderr);
19bc17a9 3975
ef446144
UD
3976 /* While we want to have a small total size we are willing to use a
3977 little bit larger table if this reduces the number of layers.
3978 Therefore we add a little penalty to the number of planes.
3979 Maybe this constant has to be adjusted a bit. */
66ac0abe 3980#define PENALTY 128
ef446144
UD
3981 do
3982 {
3983 size_t cnt[act_size];
3984 size_t act_planes = 1;
19bc17a9 3985
ef446144 3986 memset (cnt, '\0', sizeof cnt);
19bc17a9 3987
ef446144
UD
3988 for (idx = 0; idx < 256; ++idx)
3989 cnt[idx] = 1;
19bc17a9 3990
ef446144
UD
3991 for (idx = 0; idx < ctype->charnames_act; ++idx)
3992 if (ctype->charnames[idx] >= 256)
19bc17a9 3993 {
ef446144
UD
3994 size_t nr = ctype->charnames[idx] % act_size;
3995
3996 if (++cnt[nr] > act_planes)
3997 {
3998 act_planes = cnt[nr];
3999 if ((act_size + PENALTY) * act_planes >= min_total)
4000 break;
4001 }
19bc17a9 4002 }
19bc17a9 4003
ef446144
UD
4004 if ((act_size + PENALTY) * act_planes < min_total)
4005 {
4006 min_total = (act_size + PENALTY) * act_planes;
4007 ctype->plane_size = act_size;
4008 ctype->plane_cnt = act_planes;
4009 }
19bc17a9 4010
ef446144
UD
4011 ++act_size;
4012 }
4013 while (act_size < min_total);
19bc17a9 4014
ef446144
UD
4015 if (!be_quiet && ctype->charnames_act > 512)
4016 fputs (_(" done\n"), stderr);
19bc17a9 4017
75cd5204 4018
ef446144
UD
4019 ctype->names = (uint32_t *) xcalloc (ctype->plane_size
4020 * ctype->plane_cnt,
4021 sizeof (uint32_t));
19bc17a9 4022
ef446144
UD
4023 for (idx = 1; idx < 256; ++idx)
4024 ctype->names[idx] = idx;
19bc17a9 4025
ef446144
UD
4026 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
4027 ctype->names[0] = 1;
19bc17a9 4028
ef446144
UD
4029 for (idx = 256; idx < ctype->charnames_act; ++idx)
4030 {
4031 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
4032 size_t depth = 0;
19bc17a9 4033
ef446144
UD
4034 while (ctype->names[nr + depth * ctype->plane_size])
4035 ++depth;
4036 assert (depth < ctype->plane_cnt);
19bc17a9 4037
ef446144 4038 ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
19bc17a9 4039
ef446144
UD
4040 /* Now for faster access remember the index in the NAMES_B array. */
4041 ctype->charnames[idx] = nr + depth * ctype->plane_size;
4042 }
4043 ctype->names[0] = 0;
4044 }
4045 else
4046 {
4047 ctype->plane_size = 0;
4048 ctype->plane_cnt = 0;
4049 ctype->names = NULL;
19bc17a9 4050 }
19bc17a9
RM
4051
4052 /* You wonder about this amount of memory? This is only because some
4053 users do not manage to address the array with unsigned values or
4054 data types with range >= 256. '\200' would result in the array
4055 index -128. To help these poor people we duplicate the entries for
4056 128 up to 255 below the entry for \0. */
4057 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
4058 sizeof (char_class_t));
ef446144
UD
4059 ctype->ctype32_b = (char_class32_t *)
4060 xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
4061 sizeof (char_class32_t));
4062 if (!oldstyle_tables)
4063 ctype->class_3level = (struct iovec *)
4064 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
19bc17a9 4065
4a33c2f5 4066 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 4067 for (idx = 0; idx < 256; ++idx)
4a33c2f5 4068 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 4069
75cd5204
RM
4070 /* Mirror first 127 entries. We must take care that entry -1 is not
4071 mirrored because EOF == -1. */
4072 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
4073 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
4074
ef446144
UD
4075 if (oldstyle_tables)
4076 {
4077 /* The 32 bit array contains all characters. */
4078 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4079 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
4080 }
4081 else
4082 {
4083 /* The 32 bit array contains all characters < 0x100. */
4084 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4085 if (ctype->charnames[idx] < 0x100)
4086 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
4087 }
4088
4089 if (!oldstyle_tables)
4090 {
4091 size_t nr;
4092
4093 for (nr = 0; nr < ctype->nr_charclass; nr++)
4094 {
4095 struct wctype_table t;
4096
4097 t.p = 4; /* or: 5 */
4098 t.q = 7; /* or: 6 */
4099 wctype_table_init (&t);
4100
4101 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4102 if (ctype->class_collection[idx] & _ISwbit (nr))
4103 wctype_table_add (&t, ctype->charnames[idx]);
4104
4105 wctype_table_finalize (&t);
4106
4107 if (verbose)
4108 fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
4109 "LC_CTYPE", ctype->classnames[nr],
4110 (unsigned long int) t.result_size);
4111
4112 ctype->class_3level[nr].iov_base = t.result;
4113 ctype->class_3level[nr].iov_len = t.result_size;
4114 }
4115 }
19bc17a9
RM
4116
4117 /* Room for table of mappings. */
49f2be5b
UD
4118 ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
4119 ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
ef446144
UD
4120 * sizeof (uint32_t *));
4121 if (!oldstyle_tables)
4122 ctype->map_3level = (struct iovec *)
4123 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
19bc17a9
RM
4124
4125 /* Fill in all mappings. */
49f2be5b 4126 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
4127 {
4128 unsigned int idx2;
4129
4130 /* Allocate table. */
49f2be5b 4131 ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
4132
4133 /* Copy values from collection. */
4b10dd6c 4134 for (idx2 = 0; idx2 < 256; ++idx2)
4a33c2f5 4135 ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 4136
75cd5204
RM
4137 /* Mirror first 127 entries. We must take care not to map entry
4138 -1 because EOF == -1. */
4139 for (idx2 = 0; idx2 < 127; ++idx2)
4a33c2f5 4140 ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
19bc17a9 4141
75cd5204 4142 /* EOF must map to EOF. */
4a33c2f5 4143 ctype->map[idx][127] = EOF;
49f2be5b 4144 }
a9c27b3e 4145
49f2be5b
UD
4146 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
4147 {
4148 unsigned int idx2;
4149
4150 /* Allocate table. */
ef446144
UD
4151 ctype->map32[idx] = (uint32_t *)
4152 xmalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
4153 * sizeof (uint32_t));
49f2be5b
UD
4154
4155 /* Copy default value (identity mapping). */
ef446144
UD
4156 if (oldstyle_tables)
4157 memcpy (ctype->map32[idx], ctype->names,
4158 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
4159 else
4160 for (idx2 = 0; idx2 < 256; ++idx2)
4161 ctype->map32[idx][idx2] = idx2;
49f2be5b
UD
4162
4163 /* Copy values from collection. */
4164 for (idx2 = 0; idx2 < 256; ++idx2)
a9c27b3e 4165 if (ctype->map_collection[idx][idx2] != 0)
f1d8b804
UD
4166 ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
4167
ef446144
UD
4168 if (oldstyle_tables)
4169 while (idx2 < ctype->map_collection_act[idx])
4170 {
4171 if (ctype->map_collection[idx][idx2] != 0)
4172 ctype->map32[idx][ctype->charnames[idx2]] =
4173 ctype->map_collection[idx][idx2];
4174 ++idx2;
4175 }
4176 }
4177
4178 if (!oldstyle_tables)
4179 {
4180 size_t nr;
4181
4182 for (nr = 0; nr < ctype->map_collection_nr; nr++)
b06c53e7 4183 {
ef446144
UD
4184 struct wctrans_table t;
4185
4186 t.p = 7;
4187 t.q = 9;
4188 wctrans_table_init (&t);
4189
4190 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
4191 if (ctype->map_collection[nr][idx] != 0)
4192 wctrans_table_add (&t, ctype->charnames[idx],
4193 ctype->map_collection[nr][idx]);
4194
4195 wctrans_table_finalize (&t);
4196
4197 if (verbose)
4198 fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
4199 "LC_CTYPE", ctype->mapnames[nr],
4200 (unsigned long int) t.result_size);
4201
4202 ctype->map_3level[nr].iov_base = t.result;
4203 ctype->map_3level[nr].iov_len = t.result_size;
b06c53e7 4204 }
19bc17a9
RM
4205 }
4206
4207 /* Extra array for class and map names. */
4b10dd6c
UD
4208 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
4209 * sizeof (uint32_t));
4210 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
4211 * sizeof (uint32_t));
75cd5204 4212
ef446144
UD
4213 if (oldstyle_tables)
4214 {
4215 ctype->class_offset = 0; /* not really used */
4216 ctype->map_offset = 0; /* not really used */
4217 }
4218 else
4219 {
4220 ctype->class_offset = _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
4221 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
4222 }
5866b131 4223
ef446144
UD
4224 /* Array for width information. Because the expected width are very
4225 small we use only one single byte. This saves space. */
4226 if (oldstyle_tables)
75cd5204 4227 {
ef446144
UD
4228 width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
4229 ctype->width = (unsigned char *) xmalloc (width_table_size);
75cd5204 4230
ef446144
UD
4231 /* Initialize with -1. */
4232 memset (ctype->width, '\xff', width_table_size);
4233 if (charmap->width_rules != NULL)
827ff758 4234 {
ef446144 4235 size_t cnt;
827ff758 4236
ef446144 4237 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 4238 {
ef446144
UD
4239 unsigned char bytes[charmap->mb_cur_max];
4240 int nbytes = charmap->width_rules[cnt].from->nbytes;
4241
4242 /* We have the range of character for which the width is
4243 specified described using byte sequences of the multibyte
4244 charset. We have to convert this to UCS4 now. And we
4245 cannot simply convert the beginning and the end of the
4246 sequence, we have to iterate over the byte sequence and
4247 convert it for every single character. */
4248 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4249
4250 while (nbytes < charmap->width_rules[cnt].to->nbytes
4251 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4252 nbytes) <= 0)
827ff758 4253 {
ef446144
UD
4254 /* Find the UCS value for `bytes'. */
4255 int inner;
4256 uint32_t wch;
4257 struct charseq *seq =
4258 charmap_find_symbol (charmap, bytes, nbytes);
4259
4260 if (seq == NULL)
4261 wch = ILLEGAL_CHAR_VALUE;
4262 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4263 wch = seq->ucs4;
4264 else
4265 wch = repertoire_find_value (ctype->repertoire, seq->name,
4266 strlen (seq->name));
827ff758 4267
ef446144 4268 if (wch != ILLEGAL_CHAR_VALUE)
0e16ecfa 4269 {
ef446144
UD
4270 /* Store the value. */
4271 size_t nr = wch % ctype->plane_size;
4272 size_t depth = 0;
4273
4274 while (ctype->names[nr + depth * ctype->plane_size] != wch)
4275 {
4276 ++depth;
4277 assert (depth < ctype->plane_cnt);
4278 }
4279
4280 ctype->width[nr + depth * ctype->plane_size]
4281 = charmap->width_rules[cnt].width;
0e16ecfa 4282 }
827ff758 4283
ef446144
UD
4284 /* "Increment" the bytes sequence. */
4285 inner = nbytes - 1;
4286 while (inner >= 0 && bytes[inner] == 0xff)
4287 --inner;
4288
4289 if (inner < 0)
4290 {
4291 /* We have to extend the byte sequence. */
4292 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4293 break;
4294
4295 bytes[0] = 1;
4296 memset (&bytes[1], 0, nbytes);
4297 ++nbytes;
4298 }
4299 else
4300 {
4301 ++bytes[inner];
4302 while (++inner < nbytes)
4303 bytes[inner] = 0;
4304 }
827ff758 4305 }
ef446144
UD
4306 }
4307 }
827ff758 4308
ef446144
UD
4309 /* Now set all the other characters of the character set to the
4310 default width. */
4311 curs = NULL;
4312 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4313 {
4314 struct charseq *data = (struct charseq *) vdata;
4315 size_t nr;
4316 size_t depth;
75cd5204 4317
ef446144
UD
4318 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4319 data->ucs4 = repertoire_find_value (ctype->repertoire,
4320 data->name, len);
75cd5204 4321
ef446144
UD
4322 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4323 {
4324 nr = data->ucs4 % ctype->plane_size;
4325 depth = 0;
4326
4327 while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
827ff758 4328 {
ef446144
UD
4329 ++depth;
4330 assert (depth < ctype->plane_cnt);
827ff758 4331 }
ef446144
UD
4332
4333 if (ctype->width[nr + depth * ctype->plane_size]
4334 == (unsigned char) '\xff')
4335 ctype->width[nr + depth * ctype->plane_size] =
4336 charmap->width_default;
75cd5204 4337 }
827ff758 4338 }
75cd5204 4339 }
ef446144 4340 else
0e16ecfa 4341 {
ef446144 4342 struct wcwidth_table t;
0e16ecfa 4343
ef446144
UD
4344 t.p = 7;
4345 t.q = 9;
4346 wcwidth_table_init (&t);
0e16ecfa 4347
ef446144
UD
4348 /* First set all the characters of the character set to the default width. */
4349 curs = NULL;
4350 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4351 {
4352 struct charseq *data = (struct charseq *) vdata;
4353
4354 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4355 data->ucs4 = repertoire_find_value (ctype->repertoire,
4356 data->name, len);
4357
4358 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4359 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4360 }
4361
4362 /* Now add the explicitly specified widths. */
4363 if (charmap->width_rules != NULL)
0e16ecfa 4364 {
ef446144 4365 size_t cnt;
0e16ecfa 4366
ef446144 4367 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
0e16ecfa 4368 {
ef446144
UD
4369 unsigned char bytes[charmap->mb_cur_max];
4370 int nbytes = charmap->width_rules[cnt].from->nbytes;
4371
4372 /* We have the range of character for which the width is
4373 specified described using byte sequences of the multibyte
4374 charset. We have to convert this to UCS4 now. And we
4375 cannot simply convert the beginning and the end of the
4376 sequence, we have to iterate over the byte sequence and
4377 convert it for every single character. */
4378 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4379
4380 while (nbytes < charmap->width_rules[cnt].to->nbytes
4381 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4382 nbytes) <= 0)
4383 {
4384 /* Find the UCS value for `bytes'. */
4385 int inner;
4386 uint32_t wch;
4387 struct charseq *seq =
4388 charmap_find_symbol (charmap, bytes, nbytes);
4389
4390 if (seq == NULL)
4391 wch = ILLEGAL_CHAR_VALUE;
4392 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4393 wch = seq->ucs4;
4394 else
4395 wch = repertoire_find_value (ctype->repertoire, seq->name,
4396 strlen (seq->name));
4397
4398 if (wch != ILLEGAL_CHAR_VALUE)
4399 /* Store the value. */
4400 wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
4401
4402 /* "Increment" the bytes sequence. */
4403 inner = nbytes - 1;
4404 while (inner >= 0 && bytes[inner] == 0xff)
4405 --inner;
0e16ecfa 4406
ef446144
UD
4407 if (inner < 0)
4408 {
4409 /* We have to extend the byte sequence. */
4410 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4411 break;
4412
4413 bytes[0] = 1;
4414 memset (&bytes[1], 0, nbytes);
4415 ++nbytes;
4416 }
4417 else
4418 {
4419 ++bytes[inner];
4420 while (++inner < nbytes)
4421 bytes[inner] = 0;
4422 }
4423 }
4424 }
0e16ecfa 4425 }
ef446144
UD
4426
4427 wcwidth_table_finalize (&t);
4428
4429 if (verbose)
4430 fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4431 "LC_CTYPE", (unsigned long int) t.result_size);
4432
4433 ctype->width_3level.iov_base = t.result;
4434 ctype->width_3level.iov_len = t.result_size;
0e16ecfa
UD
4435 }
4436
4b10dd6c
UD
4437 /* Set MB_CUR_MAX. */
4438 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 4439
4b10dd6c
UD
4440 /* Now determine the table for the transliteration information.
4441
4442 XXX It is not yet clear to me whether it is worth implementing a
4443 complicated algorithm which uses a hash table to locate the entries.
4444 For now I'll use a simple array which can be searching using binary
4445 search. */
4446 if (ctype->translit_copy_locale != NULL)
4447 {
4448 /* Fold in the transliteration information from the locale mentioned
4449 in the `include' statement. */
4450 struct locale_ctype_t *here = ctype;
4451
4452 do
4453 {
4454 struct localedef_t *other = find_locale (LC_CTYPE,
4455 here->translit_copy_locale,
4456 repertoire->name, charmap);
4457
4458 if (other == NULL)
4459 {
4460 error (0, 0, _("\
4461%s: transliteration data from locale `%s' not available"),
4462 "LC_CTYPE", here->translit_copy_locale);
4463 break;
4464 }
4465
4466 here = other->categories[LC_CTYPE].ctype;
4467
4468 /* Enqueue the information if necessary. */
4469 if (here->translit != NULL)
4470 {
4471 struct translit_t *endp = here->translit;
4472 while (endp->next != NULL)
4473 endp = endp->next;
4474
4475 endp->next = ctype->translit;
4476 ctype->translit = here->translit;
4477 }
4478 }
4479 while (here->translit_copy_locale != NULL);
4480 }
4481
4482 if (ctype->translit != NULL)
4483 {
4484 /* First count how many entries we have. This is the upper limit
4485 since some entries from the included files might be overwritten. */
4486 size_t number = 0;
4487 size_t cnt;
4488 struct translit_t *runp = ctype->translit;
4489 struct translit_t **sorted;
4490 size_t from_len, to_len;
4491
4492 while (runp != NULL)
4493 {
4494 ++number;
4495 runp = runp->next;
4496 }
4497
4498 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
4499 sorted = (struct translit_t **) alloca (number
4500 * sizeof (struct translit_t **));
4b10dd6c
UD
4501 runp = ctype->translit;
4502 number = 0;
4503 do
4504 {
4505 /* Search for the place where to insert this string.
4506 XXX Better use a real sorting algorithm later. */
4507 size_t idx = 0;
4508 int replace = 0;
4509
4510 while (idx < number)
4511 {
4512 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4513 (const wchar_t *) runp->from);
4514 if (res == 0)
4515 {
4516 replace = 1;
4517 break;
4518 }
4519 if (res > 0)
4520 break;
4521 ++idx;
4522 }
4523
4524 if (replace)
4525 sorted[idx] = runp;
4526 else
4527 {
4528 memmove (&sorted[idx + 1], &sorted[idx],
4529 (number - idx) * sizeof (struct translit_t *));
4530 sorted[idx] = runp;
4531 ++number;
4532 }
4533
4534 runp = runp->next;
4535 }
4536 while (runp != NULL);
4537
4538 /* The next step is putting all the possible transliteration
4539 strings in one memory block so that we can write it out.
4540 We need several different blocks:
9ca23765 4541 - index to the from-string array
4b10dd6c
UD
4542 - from-string array
4543 - index to the to-string array
4544 - to-string array.
4b10dd6c
UD
4545 */
4546 from_len = to_len = 0;
4547 for (cnt = 0; cnt < number; ++cnt)
4548 {
4549 struct translit_to_t *srunp;
4550 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4551 srunp = sorted[cnt]->to;
4552 while (srunp != NULL)
4553 {
4554 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4555 srunp = srunp->next;
4556 }
4557 /* Plus one for the extra NUL character marking the end of
4558 the list for the current entry. */
4559 ++to_len;
4560 }
4561
4562 /* We can allocate the arrays for the results. */
4a33c2f5
UD
4563 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4564 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4565 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4566 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
4567
4568 from_len = 0;
4569 to_len = 0;
4570 for (cnt = 0; cnt < number; ++cnt)
4571 {
4572 size_t len;
4573 struct translit_to_t *srunp;
4574
4a33c2f5
UD
4575 ctype->translit_from_idx[cnt] = from_len;
4576 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
4577
4578 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 4579 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
4580 (const wchar_t *) sorted[cnt]->from, len);
4581 from_len += len;
4582
4a33c2f5 4583 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
4584 srunp = sorted[cnt]->to;
4585 while (srunp != NULL)
4586 {
4587 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 4588 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
4589 (const wchar_t *) srunp->str, len);
4590 to_len += len;
4591 srunp = srunp->next;
4592 }
4a33c2f5 4593 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 4594 }
4b10dd6c
UD
4595
4596 /* Store the information about the length. */
04fbc779 4597 ctype->translit_idx_size = number;
4b10dd6c
UD
4598 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4599 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4600 }
4601 else
4602 {
4603 /* Provide some dummy pointers since we have nothing to write out. */
4604 static uint32_t no_str = { 0 };
4605
4a33c2f5
UD
4606 ctype->translit_from_idx = &no_str;
4607 ctype->translit_from_tbl = &no_str;
4608 ctype->translit_to_tbl = &no_str;
4b10dd6c
UD
4609 ctype->translit_idx_size = 0;
4610 ctype->translit_from_tbl_size = 0;
4611 ctype->translit_to_tbl_size = 0;
4612 }
19bc17a9 4613}