]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
01ff9d0b 1/* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
4b10dd6c 25#include <byteswap.h>
19bc17a9 26#include <endian.h>
4b10dd6c 27#include <errno.h>
19bc17a9 28#include <limits.h>
4b10dd6c
UD
29#include <obstack.h>
30#include <stdlib.h>
19bc17a9 31#include <string.h>
4b10dd6c
UD
32#include <wchar.h>
33#include <wctype.h>
34#include <sys/uio.h>
19bc17a9 35
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c
UD
41#include "locfile.h"
42#include "localedef.h"
19bc17a9 43
19bc17a9
RM
44#include <assert.h>
45
46
011ebfab 47#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
48/* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
011ebfab
UD
50# define _ISwspecial1 (1 << 29)
51# define _ISwspecial2 (1 << 30)
52# define _ISwspecial3 (1 << 31)
53#endif
19bc17a9
RM
54
55
56/* The bit used for representing a special class. */
57#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
58#define BIT(class) (_ISbit (BITPOS (class)))
59#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
60
61#define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
19bc17a9
RM
65
66/* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
4b10dd6c 69#define char_class_t uint16_t
4b10dd6c 70#define char_class32_t uint32_t
4b10dd6c
UD
71
72
73/* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77struct translit_to_t
78{
79 uint32_t *str;
80
81 struct translit_to_t *next;
82};
83
84struct translit_t
85{
86 uint32_t *from;
87
a673fbcb
UD
88 const char *fname;
89 size_t lineno;
90
4b10dd6c
UD
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94};
19bc17a9 95
a673fbcb
UD
96struct translit_ignore_t
97{
98 uint32_t from;
99 uint32_t to;
a0dc5206 100 uint32_t step;
a673fbcb
UD
101
102 const char *fname;
103 size_t lineno;
104
105 struct translit_ignore_t *next;
106};
107
19bc17a9
RM
108
109/* The real definition of the struct for the LC_CTYPE locale. */
110struct locale_ctype_t
111{
4b10dd6c 112 uint32_t *charnames;
19bc17a9
RM
113 size_t charnames_max;
114 size_t charnames_act;
115
4b10dd6c
UD
116 struct repertoire_t *repertoire;
117
118 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
119#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 120 size_t nr_charclass;
19bc17a9 121 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
122 uint32_t last_class_char;
123 uint32_t class256_collection[256];
124 uint32_t *class_collection;
19bc17a9
RM
125 size_t class_collection_max;
126 size_t class_collection_act;
4b10dd6c
UD
127 uint32_t class_done;
128
129 struct charseq **mbdigits;
130 size_t mbdigits_act;
131 size_t mbdigits_max;
132 uint32_t *wcdigits;
133 size_t wcdigits_act;
134 size_t wcdigits_max;
135
136 struct charseq *mboutdigits[10];
137 uint32_t wcoutdigits[10];
138 size_t outdigits_act;
19bc17a9
RM
139
140 /* If the following number ever turns out to be too small simply
141 increase it. But I doubt it will. --drepper@gnu */
142#define MAX_NR_CHARMAP 16
143 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
144 uint32_t *map_collection[MAX_NR_CHARMAP];
145 uint32_t map256_collection[2][256];
9a0a462c
UD
146 size_t map_collection_max[MAX_NR_CHARMAP];
147 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
148 size_t map_collection_nr;
149 size_t last_map_idx;
4b10dd6c
UD
150 int tomap_done[MAX_NR_CHARMAP];
151
152 /* Transliteration information. */
153 const char *translit_copy_locale;
154 const char *translit_copy_repertoire;
155 struct translit_t *translit;
a673fbcb 156 struct translit_ignore_t *translit_ignore;
a8e4c924 157 uint32_t ntranslit_ignore;
a673fbcb
UD
158
159 uint32_t *default_missing;
160 const char *default_missing_file;
161 size_t default_missing_lineno;
19bc17a9
RM
162
163 /* The arrays for the binary representation. */
4b10dd6c
UD
164 uint32_t plane_size;
165 uint32_t plane_cnt;
19bc17a9
RM
166 char_class_t *ctype_b;
167 char_class32_t *ctype32_b;
4a33c2f5
UD
168 uint32_t *names;
169 uint32_t **map;
49f2be5b 170 uint32_t **map32;
4b10dd6c
UD
171 uint32_t *class_name_ptr;
172 uint32_t *map_name_ptr;
75cd5204 173 unsigned char *width;
4b10dd6c 174 uint32_t mb_cur_max;
6990326c 175 const char *codeset_name;
4a33c2f5
UD
176 uint32_t *translit_from_idx;
177 uint32_t *translit_from_tbl;
178 uint32_t *translit_to_idx;
179 uint32_t *translit_to_tbl;
04fbc779 180 uint32_t translit_idx_size;
4b10dd6c
UD
181 size_t translit_from_tbl_size;
182 size_t translit_to_tbl_size;
183
a673fbcb 184 struct obstack mempool;
19bc17a9
RM
185};
186
187
4b10dd6c
UD
188#define obstack_chunk_alloc xmalloc
189#define obstack_chunk_free free
190
191
19bc17a9 192/* Prototypes for local functions. */
4b10dd6c
UD
193static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
194 struct charmap_t *charmap, int ignore_content);
195static void ctype_class_new (struct linereader *lr,
196 struct locale_ctype_t *ctype, const char *name);
197static void ctype_map_new (struct linereader *lr,
198 struct locale_ctype_t *ctype,
199 const char *name, struct charmap_t *charmap);
200static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
201 size_t *max, size_t *act, unsigned int idx);
19bc17a9 202static void set_class_defaults (struct locale_ctype_t *ctype,
4b10dd6c
UD
203 struct charmap_t *charmap,
204 struct repertoire_t *repertoire);
75cd5204 205static void allocate_arrays (struct locale_ctype_t *ctype,
4b10dd6c
UD
206 struct charmap_t *charmap,
207 struct repertoire_t *repertoire);
19bc17a9
RM
208
209
4b10dd6c
UD
210static const char *longnames[] =
211{
212 "zero", "one", "two", "three", "four",
213 "five", "six", "seven", "eight", "nine"
214};
1b97149d
UD
215static const char *uninames[] =
216{
217 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
218 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
219};
4b10dd6c
UD
220static const unsigned char digits[] = "0123456789";
221
222
223static void
19bc17a9 224ctype_startup (struct linereader *lr, struct localedef_t *locale,
4b10dd6c 225 struct charmap_t *charmap, int ignore_content)
19bc17a9
RM
226{
227 unsigned int cnt;
228 struct locale_ctype_t *ctype;
229
4b10dd6c 230 if (!ignore_content)
19bc17a9 231 {
4b10dd6c
UD
232 /* Allocate the needed room. */
233 locale->categories[LC_CTYPE].ctype = ctype =
234 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
235
236 /* We have seen no names yet. */
237 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
238 ctype->charnames =
239 (unsigned int *) xmalloc (ctype->charnames_max
240 * sizeof (unsigned int));
241 for (cnt = 0; cnt < 256; ++cnt)
242 ctype->charnames[cnt] = cnt;
243 ctype->charnames_act = 256;
244
245 /* Fill character class information. */
246 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
247 /* The order of the following instructions determines the bit
248 positions! */
249 ctype_class_new (lr, ctype, "upper");
250 ctype_class_new (lr, ctype, "lower");
251 ctype_class_new (lr, ctype, "alpha");
252 ctype_class_new (lr, ctype, "digit");
253 ctype_class_new (lr, ctype, "xdigit");
254 ctype_class_new (lr, ctype, "space");
255 ctype_class_new (lr, ctype, "print");
256 ctype_class_new (lr, ctype, "graph");
257 ctype_class_new (lr, ctype, "blank");
258 ctype_class_new (lr, ctype, "cntrl");
259 ctype_class_new (lr, ctype, "punct");
260 ctype_class_new (lr, ctype, "alnum");
011ebfab 261#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
262 /* The following are extensions from ISO 14652. */
263 ctype_class_new (lr, ctype, "left_to_right");
264 ctype_class_new (lr, ctype, "right_to_left");
265 ctype_class_new (lr, ctype, "num_terminator");
266 ctype_class_new (lr, ctype, "num_separator");
267 ctype_class_new (lr, ctype, "segment_separator");
268 ctype_class_new (lr, ctype, "block_separator");
269 ctype_class_new (lr, ctype, "direction_control");
270 ctype_class_new (lr, ctype, "sym_swap_layout");
271 ctype_class_new (lr, ctype, "char_shape_selector");
272 ctype_class_new (lr, ctype, "num_shape_selector");
273 ctype_class_new (lr, ctype, "non_spacing");
274 ctype_class_new (lr, ctype, "non_spacing_level3");
275 ctype_class_new (lr, ctype, "normal_connect");
276 ctype_class_new (lr, ctype, "r_connect");
277 ctype_class_new (lr, ctype, "no_connect");
278 ctype_class_new (lr, ctype, "no_connect-space");
279 ctype_class_new (lr, ctype, "vowel_connect");
011ebfab 280#endif
4b10dd6c
UD
281
282 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
283 ctype->class_collection
284 = (uint32_t *) xcalloc (sizeof (unsigned long int),
285 ctype->class_collection_max);
286 ctype->class_collection_act = 256;
287
288 /* Fill character map information. */
4b10dd6c
UD
289 ctype->last_map_idx = MAX_NR_CHARMAP;
290 ctype_map_new (lr, ctype, "toupper", charmap);
291 ctype_map_new (lr, ctype, "tolower", charmap);
011ebfab 292#ifdef PREDEFINED_CLASSES
4b10dd6c 293 ctype_map_new (lr, ctype, "tosymmetric", charmap);
011ebfab 294#endif
4b10dd6c
UD
295
296 /* Fill first 256 entries in `toXXX' arrays. */
297 for (cnt = 0; cnt < 256; ++cnt)
298 {
299 ctype->map_collection[0][cnt] = cnt;
300 ctype->map_collection[1][cnt] = cnt;
9e2b7438 301#ifdef PREDEFINED_CLASSES
4b10dd6c 302 ctype->map_collection[2][cnt] = cnt;
9e2b7438 303#endif
4b10dd6c
UD
304 ctype->map256_collection[0][cnt] = cnt;
305 ctype->map256_collection[1][cnt] = cnt;
306 }
307
a673fbcb 308 obstack_init (&ctype->mempool);
19bc17a9
RM
309 }
310}
311
312
313void
4b10dd6c 314ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
19bc17a9
RM
315{
316 /* See POSIX.2, table 2-6 for the meaning of the following table. */
317#define NCLASS 12
318 static const struct
319 {
320 const char *name;
321 const char allow[NCLASS];
322 }
323 valid_table[NCLASS] =
324 {
325 /* The order is important. See token.h for more information.
326 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
327 { "upper", "--MX-XDDXXX-" },
328 { "lower", "--MX-XDDXXX-" },
329 { "alpha", "---X-XDDXXX-" },
330 { "digit", "XXX--XDDXXX-" },
331 { "xdigit", "-----XDDXXX-" },
332 { "space", "XXXXX------X" },
333 { "print", "---------X--" },
334 { "graph", "---------X--" },
335 { "blank", "XXXXXM-----X" },
336 { "cntrl", "XXXXX-XX--XX" },
337 { "punct", "XXXXX-DD-X-X" },
338 { "alnum", "-----XDDXXX-" }
339 };
340 size_t cnt;
341 int cls1, cls2;
4b10dd6c
UD
342 uint32_t space_value;
343 struct charseq *space_seq;
19bc17a9 344 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 345 int warned;
0e16ecfa
UD
346 const void *key;
347 size_t len;
348 void *vdata;
349 void *curs;
19bc17a9 350
b9eb05d6
UD
351 /* Now resolve copying and also handle completely missing definitions. */
352 if (ctype == NULL)
353 {
70e51ab9
UD
354 const char *repertoire_name;
355
b9eb05d6
UD
356 /* First see whether we were supposed to copy. If yes, find the
357 actual definition. */
358 if (locale->copy_name[LC_CTYPE] != NULL)
359 {
360 /* Find the copying locale. This has to happen transitively since
361 the locale we are copying from might also copying another one. */
362 struct localedef_t *from = locale;
363
364 do
365 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
366 from->repertoire_name, charmap);
367 while (from->categories[LC_CTYPE].ctype == NULL
368 && from->copy_name[LC_CTYPE] != NULL);
369
370 ctype = locale->categories[LC_CTYPE].ctype
371 = from->categories[LC_CTYPE].ctype;
372 }
373
374 /* If there is still no definition issue an warning and create an
375 empty one. */
376 if (ctype == NULL)
377 {
f6ada7ad
UD
378 if (! be_quiet)
379 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
b9eb05d6
UD
380 ctype_startup (NULL, locale, charmap, 0);
381 ctype = locale->categories[LC_CTYPE].ctype;
382 }
70e51ab9
UD
383
384 /* Get the repertoire we have to use. */
385 repertoire_name = locale->repertoire_name ?: repertoire_global;
386 if (repertoire_name != NULL)
387 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
388 }
389
db76d943
UD
390 /* We need the name of the currently used 8-bit character set to
391 make correct conversion between this 8-bit representation and the
392 ISO 10646 character set used internally for wide characters. */
393 ctype->codeset_name = charmap->code_set_name;
394 if (ctype->codeset_name == NULL)
395 {
396 if (! be_quiet)
397 error (0, 0, "no character set name specified in charmap");
398 ctype->codeset_name = "//UNKNOWN//";
399 }
400
19bc17a9 401 /* Set default value for classes not specified. */
4b10dd6c 402 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
403
404 /* Check according to table. */
42d7c593 405 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 406 {
4b10dd6c 407 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 408
4b10dd6c
UD
409 if (tmp != 0)
410 {
411 for (cls1 = 0; cls1 < NCLASS; ++cls1)
412 if ((tmp & _ISwbit (cls1)) != 0)
413 for (cls2 = 0; cls2 < NCLASS; ++cls2)
414 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 415 {
4b10dd6c
UD
416 int eq = (tmp & _ISwbit (cls2)) != 0;
417 switch (valid_table[cls1].allow[cls2])
19bc17a9 418 {
4b10dd6c
UD
419 case 'M':
420 if (!eq)
421 {
422 uint32_t value = ctype->charnames[cnt];
423
424 if (!be_quiet)
425 error (0, 0, _("\
426character L'\\u%0*x' in class `%s' must be in class `%s'"),
427 value > 0xffff ? 8 : 4, value,
428 valid_table[cls1].name,
429 valid_table[cls2].name);
430 }
431 break;
432
433 case 'X':
434 if (eq)
435 {
436 uint32_t value = ctype->charnames[cnt];
437
438 if (!be_quiet)
439 error (0, 0, _("\
440character L'\\u%0*x' in class `%s' must not be in class `%s'"),
441 value > 0xffff ? 8 : 4, value,
442 valid_table[cls1].name,
443 valid_table[cls2].name);
444 }
445 break;
446
447 case 'D':
448 ctype->class_collection[cnt] |= _ISwbit (cls2);
449 break;
450
451 default:
452 error (5, 0, _("internal error in %s, line %u"),
453 __FUNCTION__, __LINE__);
19bc17a9 454 }
4b10dd6c
UD
455 }
456 }
457 }
458
459 for (cnt = 0; cnt < 256; ++cnt)
460 {
461 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 462
4b10dd6c
UD
463 if (tmp != 0)
464 {
465 for (cls1 = 0; cls1 < NCLASS; ++cls1)
466 if ((tmp & _ISbit (cls1)) != 0)
467 for (cls2 = 0; cls2 < NCLASS; ++cls2)
468 if (valid_table[cls1].allow[cls2] != '-')
469 {
470 int eq = (tmp & _ISbit (cls2)) != 0;
471 switch (valid_table[cls1].allow[cls2])
19bc17a9 472 {
4b10dd6c
UD
473 case 'M':
474 if (!eq)
475 {
476 char buf[17];
477
5d431a3e 478 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
479
480 if (!be_quiet)
481 error (0, 0, _("\
482character '%s' in class `%s' must be in class `%s'"),
483 buf, valid_table[cls1].name,
484 valid_table[cls2].name);
485 }
486 break;
487
488 case 'X':
489 if (eq)
490 {
491 char buf[17];
492
5d431a3e 493 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
494
495 if (!be_quiet)
496 error (0, 0, _("\
497character '%s' in class `%s' must not be in class `%s'"),
498 buf, valid_table[cls1].name,
499 valid_table[cls2].name);
500 }
501 break;
502
503 case 'D':
504 ctype->class256_collection[cnt] |= _ISbit (cls2);
505 break;
506
507 default:
508 error (5, 0, _("internal error in %s, line %u"),
509 __FUNCTION__, __LINE__);
19bc17a9 510 }
4b10dd6c
UD
511 }
512 }
19bc17a9
RM
513 }
514
515 /* ... and now test <SP> as a special case. */
a0dc5206
UD
516 space_value = 32;
517 if (((cnt = BITPOS (tok_space),
518 (ELEM (ctype, class_collection, , space_value)
519 & BITw (tok_space)) == 0)
520 || (cnt = BITPOS (tok_blank),
521 (ELEM (ctype, class_collection, , space_value)
522 & BITw (tok_blank)) == 0)))
880f421f
UD
523 {
524 if (!be_quiet)
525 error (0, 0, _("<SP> character not in class `%s'"),
526 valid_table[cnt].name);
527 }
c84142e8
UD
528 else if (((cnt = BITPOS (tok_punct),
529 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 530 & BITw (tok_punct)) != 0)
c84142e8
UD
531 || (cnt = BITPOS (tok_graph),
532 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 533 & BITw (tok_graph))
880f421f
UD
534 != 0)))
535 {
536 if (!be_quiet)
537 error (0, 0, _("<SP> character must not be in class `%s'"),
538 valid_table[cnt].name);
539 }
19bc17a9 540 else
4b10dd6c
UD
541 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
542
543 space_seq = charmap_find_value (charmap, "SP", 2);
ce177a84 544 if (space_seq == NULL)
45c95239
UD
545 space_seq = charmap_find_value (charmap, "space", 5);
546 if (space_seq == NULL)
1b97149d 547 space_seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
548 if (space_seq == NULL || space_seq->nbytes != 1)
549 {
550 if (!be_quiet)
551 error (0, 0, _("character <SP> not defined in character map"));
552 }
553 else if (((cnt = BITPOS (tok_space),
554 (ctype->class256_collection[space_seq->bytes[0]]
555 & BIT (tok_space)) == 0)
556 || (cnt = BITPOS (tok_blank),
557 (ctype->class256_collection[space_seq->bytes[0]]
558 & BIT (tok_blank)) == 0)))
559 {
560 if (!be_quiet)
561 error (0, 0, _("<SP> character not in class `%s'"),
562 valid_table[cnt].name);
563 }
564 else if (((cnt = BITPOS (tok_punct),
565 (ctype->class256_collection[space_seq->bytes[0]]
566 & BIT (tok_punct)) != 0)
567 || (cnt = BITPOS (tok_graph),
568 (ctype->class256_collection[space_seq->bytes[0]]
569 & BIT (tok_graph)) != 0)))
570 {
571 if (!be_quiet)
572 error (0, 0, _("<SP> character must not be in class `%s'"),
573 valid_table[cnt].name);
574 }
575 else
576 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204
RM
577
578 /* Now that the tests are done make sure the name array contains all
579 characters which are handled in the WIDTH section of the
580 character set definition file. */
4b10dd6c
UD
581 if (charmap->width_rules != NULL)
582 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 583 {
827ff758
UD
584 unsigned char bytes[charmap->mb_cur_max];
585 int nbytes = charmap->width_rules[cnt].from->nbytes;
586
587 /* We have the range of character for which the width is
588 specified described using byte sequences of the multibyte
589 charset. We have to convert this to UCS4 now. And we
590 cannot simply convert the beginning and the end of the
591 sequence, we have to iterate over the byte sequence and
592 convert it for every single character. */
593 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
594
595 while (nbytes < charmap->width_rules[cnt].to->nbytes
596 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
597 nbytes) <= 0)
598 {
599 /* Find the UCS value for `bytes'. */
827ff758 600 int inner;
76e680a8
UD
601 uint32_t wch;
602 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
603
604 if (seq == NULL)
605 wch = ILLEGAL_CHAR_VALUE;
606 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
607 wch = seq->ucs4;
608 else
609 wch = repertoire_find_value (ctype->repertoire, seq->name,
610 strlen (seq->name));
827ff758
UD
611
612 if (wch != ILLEGAL_CHAR_VALUE)
613 /* We are only interested in the side-effects of the
614 `find_idx' call. It will add appropriate entries in
615 the name array if this is necessary. */
616 (void) find_idx (ctype, NULL, NULL, NULL, wch);
617
618 /* "Increment" the bytes sequence. */
619 inner = nbytes - 1;
620 while (inner >= 0 && bytes[inner] == 0xff)
621 --inner;
622
623 if (inner < 0)
624 {
625 /* We have to extend the byte sequence. */
626 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
627 break;
628
629 bytes[0] = 1;
630 memset (&bytes[1], 0, nbytes);
631 ++nbytes;
632 }
633 else
634 {
635 ++bytes[inner];
636 while (++inner < nbytes)
637 bytes[inner] = 0;
638 }
639 }
4b10dd6c
UD
640 }
641
0e16ecfa
UD
642 /* Now set all the other characters of the character set to the
643 default width. */
644 curs = NULL;
645 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
646 {
647 struct charseq *data = (struct charseq *) vdata;
648
649 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
650 data->ucs4 = repertoire_find_value (ctype->repertoire,
651 data->name, len);
652
653 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
654 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
655 }
656
4b10dd6c
UD
657 /* There must be a multiple of 10 digits. */
658 if (ctype->mbdigits_act % 10 != 0)
659 {
660 assert (ctype->mbdigits_act == ctype->wcdigits_act);
661 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
662 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
663 error (0, 0, _("`digit' category has not entries in groups of ten"));
664 }
665
666 /* Check the input digits. There must be a multiple of ten available.
42d7c593 667 In each group it could be that one or the other character is missing.
4b10dd6c
UD
668 In this case the whole group must be removed. */
669 cnt = 0;
670 while (cnt < ctype->mbdigits_act)
671 {
672 size_t inner;
673 for (inner = 0; inner < 10; ++inner)
674 if (ctype->mbdigits[cnt + inner] == NULL)
675 break;
676
677 if (inner == 10)
678 cnt += 10;
679 else
680 {
681 /* Remove the group. */
682 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
683 ((ctype->wcdigits_act - cnt - 10)
684 * sizeof (ctype->mbdigits[0])));
685 ctype->mbdigits_act -= 10;
686 }
687 }
688
689 /* If no input digits are given use the default. */
690 if (ctype->mbdigits_act == 0)
691 {
692 if (ctype->mbdigits_max == 0)
693 {
694 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
695 10 * sizeof (struct charseq *));
696 ctype->mbdigits_max = 10;
697 }
698
699 for (cnt = 0; cnt < 10; ++cnt)
700 {
701 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
702 digits + cnt, 1);
703 if (ctype->mbdigits[cnt] == NULL)
704 {
705 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
706 longnames[cnt],
707 strlen (longnames[cnt]));
708 if (ctype->mbdigits[cnt] == NULL)
709 {
710 /* Hum, this ain't good. */
711 error (0, 0, _("\
712no input digits defined and none of the standard names in the charmap"));
713
714 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
715 sizeof (struct charseq) + 1);
716
717 /* This is better than nothing. */
718 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
719 ctype->mbdigits[cnt]->nbytes = 1;
720 }
721 }
722 }
723
724 ctype->mbdigits_act = 10;
725 }
726
727 /* Check the wide character input digits. There must be a multiple
42d7c593 728 of ten available. In each group it could be that one or the other
4b10dd6c
UD
729 character is missing. In this case the whole group must be
730 removed. */
731 cnt = 0;
732 while (cnt < ctype->wcdigits_act)
733 {
734 size_t inner;
735 for (inner = 0; inner < 10; ++inner)
736 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
737 break;
738
739 if (inner == 10)
740 cnt += 10;
741 else
742 {
743 /* Remove the group. */
744 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
745 ((ctype->wcdigits_act - cnt - 10)
746 * sizeof (ctype->wcdigits[0])));
747 ctype->wcdigits_act -= 10;
748 }
749 }
750
751 /* If no input digits are given use the default. */
752 if (ctype->wcdigits_act == 0)
753 {
754 if (ctype->wcdigits_max == 0)
755 {
756 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
757 10 * sizeof (uint32_t));
758 ctype->wcdigits_max = 10;
759 }
760
761 for (cnt = 0; cnt < 10; ++cnt)
762 ctype->wcdigits[cnt] = L'0' + cnt;
763
764 ctype->mbdigits_act = 10;
765 }
766
767 /* Check the outdigits. */
768 warned = 0;
769 for (cnt = 0; cnt < 10; ++cnt)
770 if (ctype->mboutdigits[cnt] == NULL)
771 {
772 static struct charseq replace[2];
773
774 if (!warned)
775 {
776 error (0, 0, _("\
777not all characters used in `outdigit' are available in the charmap"));
778 warned = 1;
779 }
780
781 replace[0].nbytes = 1;
782 replace[0].bytes[0] = '?';
783 replace[0].bytes[1] = '\0';
784 ctype->mboutdigits[cnt] = &replace[0];
785 }
786
787 warned = 0;
788 for (cnt = 0; cnt < 10; ++cnt)
789 if (ctype->wcoutdigits[cnt] == 0)
790 {
791 if (!warned)
792 {
793 error (0, 0, _("\
794not all characters used in `outdigit' are available in the repertoire"));
795 warned = 1;
796 }
797
798 ctype->wcoutdigits[cnt] = L'?';
75cd5204 799 }
a8e4c924
UD
800
801 /* Sort the entries in the translit_ignore list. */
802 if (ctype->translit_ignore != NULL)
803 {
804 struct translit_ignore_t *firstp = ctype->translit_ignore;
805 struct translit_ignore_t *runp;
806
807 ctype->ntranslit_ignore = 1;
808
809 for (runp = firstp->next; runp != NULL; runp = runp->next)
810 {
811 struct translit_ignore_t *lastp = NULL;
812 struct translit_ignore_t *cmpp;
813
814 ++ctype->ntranslit_ignore;
815
816 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
817 if (runp->from < cmpp->from)
818 break;
819
820 runp->next = lastp;
821 if (lastp == NULL)
822 firstp = runp;
823 }
824
825 ctype->translit_ignore = firstp;
826 }
19bc17a9
RM
827}
828
829
830void
4b10dd6c 831ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
75cd5204 832 const char *output_path)
19bc17a9 833{
e43e0dd6 834 static const char nulbytes[4] = { 0, 0, 0, 0 };
19bc17a9
RM
835 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
836 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
5491da0d 837 + (ctype->map_collection_nr - 2));
75cd5204 838 struct iovec iov[2 + nelems + ctype->nr_charclass
f175216d 839 + ctype->map_collection_nr + 2];
19bc17a9 840 struct locale_file data;
4b10dd6c 841 uint32_t idx[nelems + 1];
1d96d74d 842 uint32_t default_missing_len;
75cd5204 843 size_t elem, cnt, offset, total;
4b10dd6c 844 char *cp;
19bc17a9
RM
845
846 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 847 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9
RM
848
849 data.magic = LIMAGIC (LC_CTYPE);
850 data.n = nelems;
851 iov[0].iov_base = (void *) &data;
852 iov[0].iov_len = sizeof (data);
853
854 iov[1].iov_base = (void *) idx;
a0edd63e 855 iov[1].iov_len = nelems * sizeof (uint32_t);
19bc17a9
RM
856
857 idx[0] = iov[0].iov_len + iov[1].iov_len;
858 offset = 0;
859
860 for (elem = 0; elem < nelems; ++elem)
861 {
862 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
863 switch (elem)
864 {
c6df09ad
UD
865#define CTYPE_EMPTY(name) \
866 case name: \
04fbc779 867 iov[2 + elem + offset].iov_base = (void *) ""; \
c6df09ad
UD
868 iov[2 + elem + offset].iov_len = 0; \
869 idx[elem + 1] = idx[elem]; \
870 break
871
872 CTYPE_EMPTY(_NL_CTYPE_GAP1);
873 CTYPE_EMPTY(_NL_CTYPE_GAP2);
874 CTYPE_EMPTY(_NL_CTYPE_GAP3);
875
19bc17a9
RM
876#define CTYPE_DATA(name, base, len) \
877 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
878 iov[2 + elem + offset].iov_base = (base); \
879 iov[2 + elem + offset].iov_len = (len); \
1d96d74d 880 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
881 break
882
883 CTYPE_DATA (_NL_CTYPE_CLASS,
884 ctype->ctype_b,
885 (256 + 128) * sizeof (char_class_t));
886
4a33c2f5
UD
887 CTYPE_DATA (_NL_CTYPE_TOUPPER,
888 ctype->map[0],
f1d8b804 889 (256 + 128) * sizeof (uint32_t));
4a33c2f5
UD
890 CTYPE_DATA (_NL_CTYPE_TOLOWER,
891 ctype->map[1],
f1d8b804 892 (256 + 128) * sizeof (uint32_t));
19bc17a9 893
49f2be5b
UD
894 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
895 ctype->map32[0],
f1d8b804 896 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
897 * sizeof (uint32_t));
898 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
899 ctype->map32[1],
f1d8b804 900 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
901 * sizeof (uint32_t));
902
19bc17a9
RM
903 CTYPE_DATA (_NL_CTYPE_CLASS32,
904 ctype->ctype32_b,
905 (ctype->plane_size * ctype->plane_cnt
906 * sizeof (char_class32_t)));
907
4a33c2f5
UD
908 CTYPE_DATA (_NL_CTYPE_NAMES,
909 ctype->names, (ctype->plane_size * ctype->plane_cnt
910 * sizeof (uint32_t)));
911
04fbc779
UD
912 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
913 &ctype->translit_idx_size, sizeof (uint32_t));
4a33c2f5
UD
914
915 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
916 ctype->translit_from_idx,
04fbc779 917 ctype->translit_idx_size * sizeof (uint32_t));
4b10dd6c 918
4a33c2f5
UD
919 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
920 ctype->translit_from_tbl,
4b10dd6c
UD
921 ctype->translit_from_tbl_size);
922
4a33c2f5
UD
923 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
924 ctype->translit_to_idx,
04fbc779 925 ctype->translit_idx_size * sizeof (uint32_t));
4b10dd6c 926
4a33c2f5
UD
927 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
928 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
4b10dd6c 929
4a33c2f5 930 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
4b10dd6c 931 &ctype->plane_size, sizeof (uint32_t));
4a33c2f5 932 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
4b10dd6c 933 &ctype->plane_cnt, sizeof (uint32_t));
19bc17a9 934
75cd5204
RM
935 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
936 /* The class name array. */
937 total = 0;
938 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
939 {
940 iov[2 + elem + offset].iov_base
941 = (void *) ctype->classnames[cnt];
942 iov[2 + elem + offset].iov_len
943 = strlen (ctype->classnames[cnt]) + 1;
944 total += iov[2 + elem + offset].iov_len;
945 }
e43e0dd6 946 iov[2 + elem + offset].iov_base = (void *) nulbytes;
ce7a5ef4
RM
947 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
948 total += 1 + (4 - ((total + 1) % 4));
75cd5204 949
4b10dd6c 950 idx[elem + 1] = idx[elem] + total;
75cd5204
RM
951 break;
952
953 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
954 /* The class name array. */
955 total = 0;
956 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
957 {
958 iov[2 + elem + offset].iov_base
959 = (void *) ctype->mapnames[cnt];
960 iov[2 + elem + offset].iov_len
961 = strlen (ctype->mapnames[cnt]) + 1;
962 total += iov[2 + elem + offset].iov_len;
963 }
e43e0dd6 964 iov[2 + elem + offset].iov_base = (void *) nulbytes;
ce7a5ef4
RM
965 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
966 total += 1 + (4 - ((total + 1) % 4));
75cd5204 967
4b10dd6c 968 idx[elem + 1] = idx[elem] + total;
75cd5204 969 break;
19bc17a9
RM
970
971 CTYPE_DATA (_NL_CTYPE_WIDTH,
5866b131
UD
972 ctype->width,
973 (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul);
19bc17a9 974
0200214b 975 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
4b10dd6c 976 &ctype->mb_cur_max, sizeof (uint32_t));
0200214b 977
ce7a5ef4
RM
978 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
979 total = strlen (ctype->codeset_name) + 1;
980 if (total % 4 == 0)
981 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
982 else
983 {
984 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
985 memset (mempcpy (iov[2 + elem + offset].iov_base,
986 ctype->codeset_name, total),
987 '\0', 4 - (total & 3));
ce7a5ef4
RM
988 total = (total + 3) & ~3;
989 }
990 iov[2 + elem + offset].iov_len = total;
4b10dd6c
UD
991 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
992 break;
993
4a33c2f5 994 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
4b10dd6c
UD
995 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
996 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
997 *(uint32_t *) iov[2 + elem + offset].iov_base =
998 ctype->mbdigits_act / 10;
a9c27b3e 999 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
1000 break;
1001
4a33c2f5 1002 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
e43e0dd6
UD
1003 /* Align entries. */
1004 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1005 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1006 idx[elem] += iov[2 + elem + offset].iov_len;
1007 ++offset;
1008
4b10dd6c
UD
1009 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1010 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
1011 *(uint32_t *) iov[2 + elem + offset].iov_base =
1012 ctype->wcdigits_act / 10;
a9c27b3e 1013 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
1014 break;
1015
e43e0dd6 1016 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
4b10dd6c
UD
1017 /* Compute the length of all possible characters. For INDIGITS
1018 there might be more than one. We simply concatenate all of
1019 them with a NUL byte following. The NUL byte wouldn't be
1020 necessary but it makes it easier for the user. */
1021 total = 0;
f175216d 1022
498b733e 1023 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1024 cnt < ctype->mbdigits_act; cnt += 10)
1025 total += ctype->mbdigits[cnt]->nbytes + 1;
1026 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1027 iov[2 + elem + offset].iov_len = total;
1028
1029 cp = iov[2 + elem + offset].iov_base;
498b733e 1030 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1031 cnt < ctype->mbdigits_act; cnt += 10)
1032 {
1033 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1034 ctype->mbdigits[cnt]->nbytes);
1035 *cp++ = '\0';
1036 }
a9c27b3e 1037 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1038 break;
1039
1040 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1041 /* Compute the length of all possible characters. For INDIGITS
1042 there might be more than one. We simply concatenate all of
1043 them with a NUL byte following. The NUL byte wouldn't be
1044 necessary but it makes it easier for the user. */
498b733e 1045 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
4b10dd6c
UD
1046 total = ctype->mboutdigits[cnt]->nbytes + 1;
1047 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1048 iov[2 + elem + offset].iov_len = total;
1049
1050 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
498b733e
UD
1051 ctype->mboutdigits[cnt]->bytes,
1052 ctype->mboutdigits[cnt]->nbytes) = '\0';
a9c27b3e 1053 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1054 break;
1055
e43e0dd6 1056 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
4b10dd6c
UD
1057 total = ctype->wcdigits_act / 10;
1058
1059 iov[2 + elem + offset].iov_base =
1060 (uint32_t *) alloca (total * sizeof (uint32_t));
1061 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1062
498b733e 1063 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
4b10dd6c
UD
1064 cnt < ctype->wcdigits_act; cnt += 10)
1065 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
4a33c2f5 1066 = ctype->wcdigits[cnt];
a9c27b3e 1067 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1068 break;
1069
e43e0dd6
UD
1070 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1071 /* Align entries. */
1072 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1073 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1074 idx[elem] += iov[2 + elem + offset].iov_len;
1075 ++offset;
1076 /* FALLTRHOUGH */
1077
1078 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
498b733e 1079 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
4b10dd6c
UD
1080 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1081 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a9c27b3e 1082 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1083 break;
1084
a8e4c924
UD
1085 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1086 default_missing_len = (ctype->default_missing
1087 ? wcslen ((wchar_t *)ctype->default_missing)
7f455351 1088 : 0);
a8e4c924
UD
1089 iov[2 + elem + offset].iov_base = &default_missing_len;
1090 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1091 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1092 break;
1093
1d96d74d
UD
1094 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1095 iov[2 + elem + offset].iov_base =
1096 ctype->default_missing ?: (uint32_t *) L"";
1097 iov[2 + elem + offset].iov_len =
1098 wcslen (iov[2 + elem + offset].iov_base);
1099 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1100 break;
1101
a8e4c924
UD
1102 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1103 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1d96d74d 1104 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a8e4c924
UD
1105 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1106 break;
1107
1108 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1109 {
1110 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1111 * 3 * sizeof (uint32_t));
1112 struct translit_ignore_t *runp;
1113
1114 iov[2 + elem + offset].iov_base = ranges;
1115 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1116 * 3 * sizeof (uint32_t));
1117
1118 for (runp = ctype->translit_ignore; runp != NULL;
1119 runp = runp->next)
1120 {
1121 *ranges++ = runp->from;
1122 *ranges++ = runp->to;
1123 *ranges++ = runp->step;
1124 }
1125 }
1d96d74d
UD
1126 /* Remove the following line in case a new entry is added
1127 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1128 if (elem < nelems)
1129 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1130 break;
1131
19bc17a9
RM
1132 default:
1133 assert (! "unknown CTYPE element");
1134 }
1135 else
1136 {
1137 /* Handle extra maps. */
5491da0d 1138 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
19bc17a9 1139
49f2be5b 1140 iov[2 + elem + offset].iov_base = ctype->map32[nr];
75cd5204 1141 iov[2 + elem + offset].iov_len = ((ctype->plane_size
f1d8b804 1142 * ctype->plane_cnt)
4b10dd6c 1143 * sizeof (uint32_t));
19bc17a9 1144
4b10dd6c 1145 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
75cd5204 1146 }
19bc17a9 1147 }
19bc17a9 1148
75cd5204 1149 assert (2 + elem + offset == (nelems + ctype->nr_charclass
f175216d 1150 + ctype->map_collection_nr + 2 + 2));
19bc17a9 1151
83b1b6d8 1152 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
1153}
1154
1155
4b10dd6c
UD
1156/* Local functions. */
1157static void
1158ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1159 const char *name)
19bc17a9 1160{
4b10dd6c 1161 size_t cnt;
19bc17a9 1162
4b10dd6c
UD
1163 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1164 if (strcmp (ctype->classnames[cnt], name) == 0)
1165 break;
19bc17a9 1166
4b10dd6c
UD
1167 if (cnt < ctype->nr_charclass)
1168 {
1169 lr_error (lr, _("character class `%s' already defined"), name);
1170 return;
1171 }
19bc17a9 1172
4b10dd6c
UD
1173 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1174 /* Exit code 2 is prescribed in P1003.2b. */
1175 error (2, 0, _("\
5d431a3e 1176implementation limit: no more than %Zd character classes allowed"),
4b10dd6c 1177 MAX_NR_CHARCLASS);
19bc17a9 1178
4b10dd6c 1179 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1180}
1181
1182
4b10dd6c
UD
1183static void
1184ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1185 const char *name, struct charmap_t *charmap)
19bc17a9 1186{
4b10dd6c 1187 size_t max_chars = 0;
ba1ffaa1 1188 size_t cnt;
19bc17a9 1189
4b10dd6c 1190 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1191 {
4b10dd6c
UD
1192 if (strcmp (ctype->mapnames[cnt], name) == 0)
1193 break;
1194
1195 if (max_chars < ctype->map_collection_max[cnt])
1196 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1197 }
1198
4b10dd6c
UD
1199 if (cnt < ctype->map_collection_nr)
1200 {
1201 lr_error (lr, _("character map `%s' already defined"), name);
1202 return;
1203 }
19bc17a9 1204
4b10dd6c
UD
1205 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1206 /* Exit code 2 is prescribed in P1003.2b. */
1207 error (2, 0, _("\
1208implementation limit: no more than %d character maps allowed"),
1209 MAX_NR_CHARMAP);
19bc17a9 1210
4b10dd6c
UD
1211 ctype->mapnames[cnt] = name;
1212
1213 if (max_chars == 0)
1214 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1215 else
1216 ctype->map_collection_max[cnt] = max_chars;
1217
1218 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1219 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1220 ctype->map_collection_act[cnt] = 256;
19bc17a9 1221
4b10dd6c 1222 ++ctype->map_collection_nr;
19bc17a9
RM
1223}
1224
1225
4b10dd6c 1226/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1227 is possible if we only want to extend the name array. */
4b10dd6c
UD
1228static uint32_t *
1229find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1230 size_t *act, uint32_t idx)
19bc17a9 1231{
4b10dd6c 1232 size_t cnt;
19bc17a9 1233
4b10dd6c
UD
1234 if (idx < 256)
1235 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1236
4b10dd6c
UD
1237 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1238 if (ctype->charnames[cnt] == idx)
1239 break;
19bc17a9 1240
4b10dd6c
UD
1241 /* We have to distinguish two cases: the name is found or not. */
1242 if (cnt == ctype->charnames_act)
1243 {
1244 /* Extend the name array. */
1245 if (ctype->charnames_act == ctype->charnames_max)
1246 {
1247 ctype->charnames_max *= 2;
5866b131 1248 ctype->charnames = (uint32_t *)
4b10dd6c 1249 xrealloc (ctype->charnames,
5866b131 1250 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1251 }
1252 ctype->charnames[ctype->charnames_act++] = idx;
1253 }
19bc17a9 1254
4b10dd6c
UD
1255 if (table == NULL)
1256 /* We have done everything we are asked to do. */
1257 return NULL;
19bc17a9 1258
4b10dd6c
UD
1259 if (cnt >= *act)
1260 {
1261 if (cnt >= *max)
1262 {
1263 size_t old_max = *max;
1264 do
1265 *max *= 2;
1266 while (*max <= cnt);
19bc17a9 1267
4b10dd6c 1268 *table =
5866b131 1269 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1270 memset (&(*table)[old_max], '\0',
1271 (*max - old_max) * sizeof (uint32_t));
1272 }
19bc17a9 1273
76e680a8 1274 *act = cnt + 1;
4b10dd6c 1275 }
19bc17a9 1276
4b10dd6c 1277 return &(*table)[cnt];
19bc17a9
RM
1278}
1279
1280
4b10dd6c
UD
1281static int
1282get_character (struct token *now, struct charmap_t *charmap,
1283 struct repertoire_t *repertoire,
1284 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1285{
4b10dd6c
UD
1286 if (now->tok == tok_bsymbol)
1287 {
1288 /* This will hopefully be the normal case. */
1289 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1290 now->val.str.lenmb);
1291 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1292 now->val.str.lenmb);
1293 }
1294 else if (now->tok == tok_ucs4)
1295 {
f0a4b6b1
UD
1296 char utmp[10];
1297
1298 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1299 *seqp = charmap_find_value (charmap, utmp, 9);
1300
1301 if (*seqp == NULL)
1302 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1303
4b10dd6c
UD
1304 if (*seqp == NULL)
1305 {
1306 /* Compute the value in the charmap from the UCS value. */
1307 const char *symbol = repertoire_find_symbol (repertoire,
1308 now->val.ucs4);
19bc17a9 1309
4b10dd6c
UD
1310 if (symbol == NULL)
1311 *seqp = NULL;
1312 else
1313 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1314
4b10dd6c
UD
1315 if (*seqp == NULL)
1316 {
723faa38
UD
1317 if (repertoire != NULL)
1318 {
1319 /* Insert a negative entry. */
1320 static const struct charseq negative
1321 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1322 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1323 sizeof (uint32_t));
1324 *newp = now->val.ucs4;
1325
1326 insert_entry (&repertoire->seq_table, newp,
1327 sizeof (uint32_t), (void *) &negative);
1328 }
4b10dd6c
UD
1329 }
1330 else
1331 (*seqp)->ucs4 = now->val.ucs4;
1332 }
1333 else if ((*seqp)->ucs4 != now->val.ucs4)
1334 *seqp = NULL;
19bc17a9 1335
4b10dd6c
UD
1336 *wchp = now->val.ucs4;
1337 }
1338 else if (now->tok == tok_charcode)
1339 {
1340 /* We must map from the byte code to UCS4. */
1341 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1342 now->val.str.lenmb);
19bc17a9 1343
4b10dd6c
UD
1344 if (*seqp == NULL)
1345 *wchp = ILLEGAL_CHAR_VALUE;
1346 else
1347 {
1348 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1349 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1350 strlen ((*seqp)->name));
1351 *wchp = (*seqp)->ucs4;
1352 }
1353 }
1354 else
1355 return 1;
19bc17a9
RM
1356
1357 return 0;
1358}
1359
1360
a0dc5206
UD
1361/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1362 the .(2). counterparts. */
4b10dd6c
UD
1363static void
1364charclass_symbolic_ellipsis (struct linereader *ldfile,
1365 struct locale_ctype_t *ctype,
1366 struct charmap_t *charmap,
1367 struct repertoire_t *repertoire,
1368 struct token *now,
1369 const char *last_str,
1370 unsigned long int class256_bit,
1371 unsigned long int class_bit, int base,
a0dc5206 1372 int ignore_content, int handle_digits, int step)
19bc17a9 1373{
4b10dd6c
UD
1374 const char *nowstr = now->val.str.startmb;
1375 char tmp[now->val.str.lenmb + 1];
1376 const char *cp;
1377 char *endp;
1378 unsigned long int from;
1379 unsigned long int to;
19bc17a9 1380
4b10dd6c
UD
1381 /* We have to compute the ellipsis values using the symbolic names. */
1382 assert (last_str != NULL);
1383
1384 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1385 {
4b10dd6c
UD
1386 invalid_range:
1387 lr_error (ldfile,
549b3c3a 1388 _("`%s' and `%.*s' are no valid names for symbolic range"),
f6ada7ad 1389 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1390 return;
19bc17a9
RM
1391 }
1392
4b10dd6c
UD
1393 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1394 /* Nothing to do, the names are the same. */
1395 return;
19bc17a9 1396
4b10dd6c
UD
1397 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1398 ;
19bc17a9 1399
4b10dd6c
UD
1400 errno = 0;
1401 from = strtoul (cp, &endp, base);
1402 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1403 goto invalid_range;
19bc17a9 1404
4b10dd6c 1405 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1406 if ((to == UINT_MAX && errno == ERANGE)
1407 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1408 goto invalid_range;
19bc17a9 1409
4b10dd6c
UD
1410 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1411 if (!ignore_content)
1412 {
1413 now->val.str.startmb = tmp;
a0dc5206 1414 while ((from += step) <= to)
4b10dd6c
UD
1415 {
1416 struct charseq *seq;
1417 uint32_t wch;
19bc17a9 1418
4b10dd6c
UD
1419 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1420 last_str, now->val.str.lenmb - (cp - last_str), from);
19bc17a9 1421
4b10dd6c
UD
1422 get_character (now, charmap, repertoire, &seq, &wch);
1423
1424 if (seq != NULL && seq->nbytes == 1)
1425 /* Yep, we can store information about this byte sequence. */
1426 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1427
4b10dd6c
UD
1428 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1429 /* We have the UCS4 position. */
1430 *find_idx (ctype, &ctype->class_collection,
1431 &ctype->class_collection_max,
1432 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1433
4b10dd6c
UD
1434 if (handle_digits == 1)
1435 {
1436 /* We must store the digit values. */
1437 if (ctype->mbdigits_act == ctype->mbdigits_max)
1438 {
1439 ctype->mbdigits_max *= 2;
1440 ctype->mbdigits = xrealloc (ctype->mbdigits,
1441 (ctype->mbdigits_max
1442 * sizeof (char *)));
1443 ctype->wcdigits_max *= 2;
1444 ctype->wcdigits = xrealloc (ctype->wcdigits,
1445 (ctype->wcdigits_max
1446 * sizeof (uint32_t)));
1447 }
1448
1449 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1450 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1451 }
1452 else if (handle_digits == 2)
1453 {
1454 /* We must store the digit values. */
1455 if (ctype->outdigits_act >= 10)
1456 {
1457 lr_error (ldfile, _("\
1458%s: field `%s' does not contain exactly ten entries"),
1459 "LC_CTYPE", "outdigit");
1460 return;
1461 }
1462
1463 ctype->mboutdigits[ctype->outdigits_act] = seq;
1464 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1465 ++ctype->outdigits_act;
1466 }
1467 }
1468 }
19bc17a9
RM
1469}
1470
1471
a0dc5206 1472/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
4b10dd6c
UD
1473static void
1474charclass_ucs4_ellipsis (struct linereader *ldfile,
1475 struct locale_ctype_t *ctype,
1476 struct charmap_t *charmap,
1477 struct repertoire_t *repertoire,
1478 struct token *now, uint32_t last_wch,
1479 unsigned long int class256_bit,
1480 unsigned long int class_bit, int ignore_content,
a0dc5206 1481 int handle_digits, int step)
19bc17a9 1482{
4b10dd6c 1483 if (last_wch > now->val.ucs4)
19bc17a9 1484 {
4b10dd6c
UD
1485 lr_error (ldfile, _("\
1486to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1487 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1488 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1489 return;
1490 }
1491
4b10dd6c 1492 if (!ignore_content)
a0dc5206 1493 while ((last_wch += step) <= now->val.ucs4)
4b10dd6c
UD
1494 {
1495 /* We have to find out whether there is a byte sequence corresponding
1496 to this UCS4 value. */
f0a4b6b1
UD
1497 struct charseq *seq;
1498 char utmp[10];
1499
1500 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1501 seq = charmap_find_value (charmap, utmp, 9);
a0dc5206
UD
1502 if (seq == NULL)
1503 {
1504 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1505 seq = charmap_find_value (charmap, utmp, 5);
1506 }
f0a4b6b1
UD
1507
1508 if (seq == NULL)
1509 /* Try looking in the repertoire map. */
1510 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1511
4b10dd6c
UD
1512 /* If this is the first time we look for this sequence create a new
1513 entry. */
1514 if (seq == NULL)
1515 {
f0a4b6b1
UD
1516 static const struct charseq negative
1517 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1518
f0a4b6b1
UD
1519 /* Find the symbolic name for this UCS4 value. */
1520 if (repertoire != NULL)
4b10dd6c 1521 {
f0a4b6b1
UD
1522 const char *symbol = repertoire_find_symbol (repertoire,
1523 last_wch);
5866b131
UD
1524 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1525 sizeof (uint32_t));
f0a4b6b1
UD
1526 *newp = last_wch;
1527
1528 if (symbol != NULL)
1529 /* We have a name, now search the multibyte value. */
1530 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1531
1532 if (seq == NULL)
1533 /* We have to create a fake entry. */
1534 seq = (struct charseq *) &negative;
1535 else
1536 seq->ucs4 = last_wch;
1537
5866b131
UD
1538 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1539 seq);
4b10dd6c
UD
1540 }
1541 else
f0a4b6b1
UD
1542 /* We have to create a fake entry. */
1543 seq = (struct charseq *) &negative;
4b10dd6c
UD
1544 }
1545
1546 /* We have a name, now search the multibyte value. */
1547 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1548 /* Yep, we can store information about this byte sequence. */
1549 ctype->class256_collection[(size_t) seq->bytes[0]]
1550 |= class256_bit;
1551
1552 /* And of course we have the UCS4 position. */
5866b131 1553 if (class_bit != 0)
4b10dd6c
UD
1554 *find_idx (ctype, &ctype->class_collection,
1555 &ctype->class_collection_max,
1556 &ctype->class_collection_act, last_wch) |= class_bit;
1557
1558 if (handle_digits == 1)
1559 {
1560 /* We must store the digit values. */
1561 if (ctype->mbdigits_act == ctype->mbdigits_max)
1562 {
1563 ctype->mbdigits_max *= 2;
1564 ctype->mbdigits = xrealloc (ctype->mbdigits,
1565 (ctype->mbdigits_max
1566 * sizeof (char *)));
1567 ctype->wcdigits_max *= 2;
1568 ctype->wcdigits = xrealloc (ctype->wcdigits,
1569 (ctype->wcdigits_max
1570 * sizeof (uint32_t)));
1571 }
1572
1573 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1574 ? seq : NULL);
1575 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1576 }
1577 else if (handle_digits == 2)
1578 {
1579 /* We must store the digit values. */
1580 if (ctype->outdigits_act >= 10)
1581 {
1582 lr_error (ldfile, _("\
1583%s: field `%s' does not contain exactly ten entries"),
1584 "LC_CTYPE", "outdigit");
1585 return;
1586 }
19bc17a9 1587
4b10dd6c
UD
1588 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1589 ? seq : NULL);
1590 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1591 ++ctype->outdigits_act;
1592 }
1593 }
19bc17a9
RM
1594}
1595
1596
4b10dd6c 1597/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1598static void
4b10dd6c
UD
1599charclass_charcode_ellipsis (struct linereader *ldfile,
1600 struct locale_ctype_t *ctype,
1601 struct charmap_t *charmap,
1602 struct repertoire_t *repertoire,
1603 struct token *now, char *last_charcode,
1604 uint32_t last_charcode_len,
1605 unsigned long int class256_bit,
1606 unsigned long int class_bit, int ignore_content,
1607 int handle_digits)
19bc17a9 1608{
4b10dd6c
UD
1609 /* First check whether the to-value is larger. */
1610 if (now->val.charcode.nbytes != last_charcode_len)
1611 {
1612 lr_error (ldfile, _("\
1613start end end character sequence of range must have the same length"));
1614 return;
1615 }
19bc17a9 1616
4b10dd6c 1617 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1618 {
4b10dd6c
UD
1619 lr_error (ldfile, _("\
1620to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1621 return;
1622 }
1623
4b10dd6c
UD
1624 if (!ignore_content)
1625 {
1626 do
1627 {
1628 /* Increment the byte sequence value. */
1629 struct charseq *seq;
1630 uint32_t wch;
1631 int i;
1632
1633 for (i = last_charcode_len - 1; i >= 0; --i)
1634 if (++last_charcode[i] != 0)
1635 break;
1636
1637 if (last_charcode_len == 1)
1638 /* Of course we have the charcode value. */
1639 ctype->class256_collection[(size_t) last_charcode[0]]
1640 |= class256_bit;
1641
1642 /* Find the symbolic name. */
1643 seq = charmap_find_symbol (charmap, last_charcode,
1644 last_charcode_len);
1645 if (seq != NULL)
1646 {
1647 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1648 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1649 strlen (seq->name));
f0a4b6b1 1650 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1651
1652 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1653 *find_idx (ctype, &ctype->class_collection,
1654 &ctype->class_collection_max,
1655 &ctype->class_collection_act, wch) |= class_bit;
1656 }
1657 else
1658 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1659
4b10dd6c
UD
1660 if (handle_digits == 1)
1661 {
1662 /* We must store the digit values. */
1663 if (ctype->mbdigits_act == ctype->mbdigits_max)
1664 {
1665 ctype->mbdigits_max *= 2;
1666 ctype->mbdigits = xrealloc (ctype->mbdigits,
1667 (ctype->mbdigits_max
1668 * sizeof (char *)));
1669 ctype->wcdigits_max *= 2;
1670 ctype->wcdigits = xrealloc (ctype->wcdigits,
1671 (ctype->wcdigits_max
1672 * sizeof (uint32_t)));
1673 }
1674
1675 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1676 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1677 seq->nbytes = last_charcode_len;
1678
1679 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1680 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1681 }
1682 else if (handle_digits == 2)
1683 {
1684 struct charseq *seq;
1685 /* We must store the digit values. */
1686 if (ctype->outdigits_act >= 10)
1687 {
1688 lr_error (ldfile, _("\
1689%s: field `%s' does not contain exactly ten entries"),
1690 "LC_CTYPE", "outdigit");
1691 return;
1692 }
1693
1694 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1695 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1696 seq->nbytes = last_charcode_len;
1697
1698 ctype->mboutdigits[ctype->outdigits_act] = seq;
1699 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1700 ++ctype->outdigits_act;
1701 }
1702 }
1703 while (memcmp (last_charcode, now->val.charcode.bytes,
1704 last_charcode_len) != 0);
1705 }
19bc17a9
RM
1706}
1707
1708
4b10dd6c
UD
1709/* Read one transliteration entry. */
1710static uint32_t *
1711read_widestring (struct linereader *ldfile, struct token *now,
1712 struct charmap_t *charmap, struct repertoire_t *repertoire)
19bc17a9 1713{
4b10dd6c 1714 uint32_t *wstr;
19bc17a9 1715
4b10dd6c
UD
1716 if (now->tok == tok_default_missing)
1717 /* The special name "" will denote this case. */
5866b131 1718 wstr = ((uint32_t *) { 0 });
4b10dd6c 1719 else if (now->tok == tok_bsymbol)
19bc17a9 1720 {
4b10dd6c 1721 /* Get the value from the repertoire. */
a673fbcb 1722 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1723 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1724 now->val.str.lenmb);
1725 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1726 {
1727 /* We cannot proceed, we don't know the UCS4 value. */
1728 free (wstr);
1729 return NULL;
1730 }
4b10dd6c
UD
1731
1732 wstr[1] = 0;
19bc17a9 1733 }
4b10dd6c 1734 else if (now->tok == tok_ucs4)
19bc17a9 1735 {
a673fbcb 1736 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1737 wstr[0] = now->val.ucs4;
1738 wstr[1] = 0;
1739 }
1740 else if (now->tok == tok_charcode)
1741 {
1742 /* Argh, we have to convert to the symbol name first and then to the
1743 UCS4 value. */
1744 struct charseq *seq = charmap_find_symbol (charmap,
1745 now->val.str.startmb,
1746 now->val.str.lenmb);
1747 if (seq == NULL)
1748 /* Cannot find the UCS4 value. */
1749 return NULL;
1750
1751 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1752 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1753 strlen (seq->name));
1754 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1755 /* We cannot proceed, we don't know the UCS4 value. */
1756 return NULL;
1757
a673fbcb 1758 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1759 wstr[0] = seq->ucs4;
1760 wstr[1] = 0;
1761 }
1762 else if (now->tok == tok_string)
1763 {
1764 wstr = now->val.str.startwc;
a673fbcb 1765 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1766 return NULL;
1767 }
1768 else
1769 {
1770 if (now->tok != tok_eol && now->tok != tok_eof)
1771 lr_ignore_rest (ldfile, 0);
1772 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1773 return (uint32_t *) -1l;
19bc17a9
RM
1774 }
1775
4b10dd6c
UD
1776 return wstr;
1777}
19bc17a9 1778
19bc17a9 1779
4b10dd6c
UD
1780static void
1781read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1782 struct token *now, struct charmap_t *charmap,
1783 struct repertoire_t *repertoire)
1784{
1785 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1786 struct translit_t *result;
1787 struct translit_to_t **top;
a673fbcb 1788 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1789 int first;
1790 int ignore;
1791
1792 if (from_wstr == NULL)
1793 /* There is no valid from string. */
1794 return;
19bc17a9 1795
4b10dd6c
UD
1796 result = (struct translit_t *) obstack_alloc (ob,
1797 sizeof (struct translit_t));
1798 result->from = from_wstr;
a673fbcb
UD
1799 result->fname = ldfile->fname;
1800 result->lineno = ldfile->lineno;
4b10dd6c
UD
1801 result->next = NULL;
1802 result->to = NULL;
1803 top = &result->to;
1804 first = 1;
1805 ignore = 0;
1806
1807 while (1)
1808 {
1809 uint32_t *to_wstr;
1810
1811 /* Next we have one or more transliterations. They are
1812 separated by semicolons. */
1813 now = lr_token (ldfile, charmap, repertoire);
1814
1815 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1816 {
1817 /* One string read. */
1818 const uint32_t zero = 0;
1819
1820 if (!ignore)
1821 {
1822 obstack_grow (ob, &zero, 4);
1823 to_wstr = obstack_finish (ob);
1824
1825 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1826 (*top)->str = to_wstr;
1827 (*top)->next = NULL;
1828 }
1829
1830 if (now->tok == tok_eol)
1831 {
1832 result->next = ctype->translit;
1833 ctype->translit = result;
1834 return;
1835 }
1836
1837 if (!ignore)
1838 top = &(*top)->next;
1839 ignore = 0;
1840 }
1841 else
1842 {
1843 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1844 if (to_wstr == (uint32_t *) -1l)
1845 {
1846 /* An error occurred. */
1847 obstack_free (ob, result);
1848 return;
1849 }
1850
1851 if (to_wstr == NULL)
1852 ignore = 1;
1853 else
1854 /* This value is usable. */
1855 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1856
4b10dd6c
UD
1857 first = 0;
1858 }
1859 }
19bc17a9
RM
1860}
1861
1862
a673fbcb
UD
1863static void
1864read_translit_ignore_entry (struct linereader *ldfile,
1865 struct locale_ctype_t *ctype,
1866 struct charmap_t *charmap,
1867 struct repertoire_t *repertoire)
1868{
1869 /* We expect a semicolon-separated list of characters we ignore. We are
1870 only interested in the wide character definitions. These must be
1871 single characters, possibly defining a range when an ellipsis is used. */
1872 while (1)
1873 {
1874 struct token *now = lr_token (ldfile, charmap, repertoire);
1875 struct translit_ignore_t *newp;
1876 uint32_t from;
1877
1878 if (now->tok == tok_eol || now->tok == tok_eof)
1879 {
1880 lr_error (ldfile,
1881 _("premature end of `translit_ignore' definition"));
1882 return;
1883 }
1884
1885 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1886 {
1887 lr_error (ldfile, _("syntax error"));
1888 lr_ignore_rest (ldfile, 0);
1889 return;
1890 }
1891
1892 if (now->tok == tok_ucs4)
1893 from = now->val.ucs4;
1894 else
f0a4b6b1
UD
1895 /* Try to get the value. */
1896 from = repertoire_find_value (repertoire, now->val.str.startmb,
1897 now->val.str.lenmb);
a673fbcb
UD
1898
1899 if (from == ILLEGAL_CHAR_VALUE)
1900 {
1901 lr_error (ldfile, "invalid character name");
1902 newp = NULL;
1903 }
1904 else
1905 {
1906 newp = (struct translit_ignore_t *)
1907 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1908 newp->from = from;
1909 newp->to = from;
a0dc5206 1910 newp->step = 1;
a673fbcb
UD
1911
1912 newp->next = ctype->translit_ignore;
1913 ctype->translit_ignore = newp;
1914 }
1915
1916 /* Now we expect either a semicolon, an ellipsis, or the end of the
1917 line. */
1918 now = lr_token (ldfile, charmap, repertoire);
1919
a0dc5206 1920 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
a673fbcb
UD
1921 {
1922 /* XXX Should we bother implementing `....'? `...' certainly
1923 will not be implemented. */
1924 uint32_t to;
a0dc5206 1925 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
a673fbcb
UD
1926
1927 now = lr_token (ldfile, charmap, repertoire);
1928
1929 if (now->tok == tok_eol || now->tok == tok_eof)
1930 {
1931 lr_error (ldfile,
1932 _("premature end of `translit_ignore' definition"));
1933 return;
1934 }
1935
1936 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1937 {
1938 lr_error (ldfile, _("syntax error"));
1939 lr_ignore_rest (ldfile, 0);
1940 return;
1941 }
1942
1943 if (now->tok == tok_ucs4)
1944 to = now->val.ucs4;
1945 else
f0a4b6b1
UD
1946 /* Try to get the value. */
1947 to = repertoire_find_value (repertoire, now->val.str.startmb,
1948 now->val.str.lenmb);
a673fbcb
UD
1949
1950 if (to == ILLEGAL_CHAR_VALUE)
1951 lr_error (ldfile, "invalid character name");
1952 else
1953 {
1954 /* Make sure the `to'-value is larger. */
1955 if (to >= from)
a0dc5206
UD
1956 {
1957 newp->to = to;
1958 newp->step = step;
1959 }
a673fbcb
UD
1960 else
1961 lr_error (ldfile, _("\
1962to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1963 (to | from) < 65536 ? 4 : 8, to,
1964 (to | from) < 65536 ? 4 : 8, from);
1965 }
1966
1967 /* And the next token. */
1968 now = lr_token (ldfile, charmap, repertoire);
1969 }
1970
1971 if (now->tok == tok_eol || now->tok == tok_eof)
1972 /* We are done. */
1973 return;
1974
1975 if (now->tok == tok_semicolon)
1976 /* Next round. */
1977 continue;
1978
1979 /* If we come here something is wrong. */
1980 lr_error (ldfile, _("syntax error"));
1981 lr_ignore_rest (ldfile, 0);
1982 return;
1983 }
1984}
1985
1986
4b10dd6c
UD
1987/* The parser for the LC_CTYPE section of the locale definition. */
1988void
1989ctype_read (struct linereader *ldfile, struct localedef_t *result,
1990 struct charmap_t *charmap, const char *repertoire_name,
1991 int ignore_content)
19bc17a9 1992{
4b10dd6c
UD
1993 struct repertoire_t *repertoire = NULL;
1994 struct locale_ctype_t *ctype;
1995 struct token *now;
1996 enum token_t nowtok;
19bc17a9 1997 size_t cnt;
4b10dd6c
UD
1998 struct charseq *last_seq;
1999 uint32_t last_wch = 0;
2000 enum token_t last_token;
2001 enum token_t ellipsis_token;
a0dc5206 2002 int step;
4b10dd6c
UD
2003 char last_charcode[16];
2004 size_t last_charcode_len = 0;
2005 const char *last_str = NULL;
2006 int mapidx;
19bc17a9 2007
4b10dd6c
UD
2008 /* Get the repertoire we have to use. */
2009 if (repertoire_name != NULL)
2010 repertoire = repertoire_read (repertoire_name);
19bc17a9 2011
4b10dd6c
UD
2012 /* The rest of the line containing `LC_CTYPE' must be free. */
2013 lr_ignore_rest (ldfile, 1);
19bc17a9 2014
4b10dd6c
UD
2015
2016 do
19bc17a9 2017 {
4b10dd6c
UD
2018 now = lr_token (ldfile, charmap, NULL);
2019 nowtok = now->tok;
19bc17a9 2020 }
4b10dd6c 2021 while (nowtok == tok_eol);
19bc17a9 2022
4b10dd6c
UD
2023 /* If we see `copy' now we are almost done. */
2024 if (nowtok == tok_copy)
2025 {
01ff9d0b
UD
2026 handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
2027 LC_CTYPE, "LC_CTYPE", ignore_content);
4b10dd6c
UD
2028 return;
2029 }
75cd5204 2030
4b10dd6c
UD
2031 /* Prepare the data structures. */
2032 ctype_startup (ldfile, result, charmap, ignore_content);
2033 ctype = result->categories[LC_CTYPE].ctype;
2034
2035 /* Remember the repertoire we use. */
2036 if (!ignore_content)
2037 ctype->repertoire = repertoire;
2038
2039 while (1)
19bc17a9 2040 {
4b10dd6c
UD
2041 unsigned long int class_bit = 0;
2042 unsigned long int class256_bit = 0;
2043 int handle_digits = 0;
2044
2045 /* Of course we don't proceed beyond the end of file. */
2046 if (nowtok == tok_eof)
2047 break;
2048
2049 /* Ingore empty lines. */
2050 if (nowtok == tok_eol)
19bc17a9 2051 {
4b10dd6c
UD
2052 now = lr_token (ldfile, charmap, NULL);
2053 nowtok = now->tok;
2054 continue;
2055 }
19bc17a9 2056
4b10dd6c
UD
2057 switch (nowtok)
2058 {
5491da0d
UD
2059 case tok_charclass:
2060 now = lr_token (ldfile, charmap, NULL);
2061 while (now->tok == tok_ident || now->tok == tok_string)
2062 {
2063 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2064 now = lr_token (ldfile, charmap, NULL);
2065 if (now->tok != tok_semicolon)
2066 break;
2067 now = lr_token (ldfile, charmap, NULL);
2068 }
2069 if (now->tok != tok_eol)
2070 SYNTAX_ERROR (_("\
2071%s: syntax error in definition of new character class"), "LC_CTYPE");
2072 break;
2073
2074 case tok_charconv:
2075 now = lr_token (ldfile, charmap, NULL);
2076 while (now->tok == tok_ident || now->tok == tok_string)
2077 {
2078 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2079 now = lr_token (ldfile, charmap, NULL);
2080 if (now->tok != tok_semicolon)
2081 break;
2082 now = lr_token (ldfile, charmap, NULL);
2083 }
2084 if (now->tok != tok_eol)
2085 SYNTAX_ERROR (_("\
2086%s: syntax error in definition of new character map"), "LC_CTYPE");
2087 break;
2088
4b10dd6c 2089 case tok_class:
b9eb05d6
UD
2090 /* Ignore the rest of the line if we don't need the input of
2091 this line. */
2092 if (ignore_content)
2093 {
2094 lr_ignore_rest (ldfile, 0);
2095 break;
2096 }
2097
4b10dd6c
UD
2098 /* We simply forget the `class' keyword and use the following
2099 operand to determine the bit. */
2100 now = lr_token (ldfile, charmap, NULL);
2101 if (now->tok == tok_ident || now->tok == tok_string)
2102 {
87372aa9 2103 /* Must can be one of the predefined class names. */
4b10dd6c
UD
2104 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2105 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2106 break;
2107 if (cnt >= ctype->nr_charclass)
2108 {
011ebfab 2109#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2110 if (now->val.str.lenmb == 8
2111 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2112 class_bit = _ISwspecial1;
2113 else if (now->val.str.lenmb == 8
2114 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2115 class_bit = _ISwspecial2;
2116 else if (now->val.str.lenmb == 8
2117 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2118 class_bit = _ISwspecial3;
2119 else
011ebfab 2120#endif
4b10dd6c 2121 {
87372aa9
UD
2122 /* OK, it's a new class. */
2123 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 2124
87372aa9 2125 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
2126 }
2127 }
2128 else
7f653277
UD
2129 {
2130 class_bit = _ISwbit (cnt);
4b10dd6c 2131
7f653277
UD
2132 free (now->val.str.startmb);
2133 }
4b10dd6c
UD
2134 }
2135 else if (now->tok == tok_digit)
2136 goto handle_tok_digit;
2137 else if (now->tok < tok_upper || now->tok > tok_blank)
2138 goto err_label;
2139 else
2140 {
2141 class_bit = BITw (now->tok);
2142 class256_bit = BIT (now->tok);
2143 }
2144
2145 /* The next character must be a semicolon. */
2146 now = lr_token (ldfile, charmap, NULL);
2147 if (now->tok != tok_semicolon)
2148 goto err_label;
2149 goto read_charclass;
2150
2151 case tok_upper:
2152 case tok_lower:
2153 case tok_alpha:
2154 case tok_alnum:
2155 case tok_space:
2156 case tok_cntrl:
2157 case tok_punct:
2158 case tok_graph:
2159 case tok_print:
2160 case tok_xdigit:
2161 case tok_blank:
b9eb05d6
UD
2162 /* Ignore the rest of the line if we don't need the input of
2163 this line. */
2164 if (ignore_content)
2165 {
2166 lr_ignore_rest (ldfile, 0);
2167 break;
2168 }
2169
4b10dd6c
UD
2170 class_bit = BITw (now->tok);
2171 class256_bit = BIT (now->tok);
2172 handle_digits = 0;
2173 read_charclass:
2174 ctype->class_done |= class_bit;
2175 last_token = tok_none;
2176 ellipsis_token = tok_none;
a0dc5206 2177 step = 1;
4b10dd6c
UD
2178 now = lr_token (ldfile, charmap, NULL);
2179 while (now->tok != tok_eol && now->tok != tok_eof)
2180 {
2181 uint32_t wch;
2182 struct charseq *seq;
2183
2184 if (ellipsis_token == tok_none)
2185 {
2186 if (get_character (now, charmap, repertoire, &seq, &wch))
2187 goto err_label;
2188
2189 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2190 /* Yep, we can store information about this byte
2191 sequence. */
2192 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2193
2194 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2195 && class_bit != 0)
2196 /* We have the UCS4 position. */
2197 *find_idx (ctype, &ctype->class_collection,
2198 &ctype->class_collection_max,
2199 &ctype->class_collection_act, wch) |= class_bit;
2200
2201 last_token = now->tok;
549b3c3a 2202 /* Terminate the string. */
9e2b7438
UD
2203 if (last_token == tok_bsymbol)
2204 {
2205 now->val.str.startmb[now->val.str.lenmb] = '\0';
2206 last_str = now->val.str.startmb;
2207 }
2208 else
2209 last_str = NULL;
4b10dd6c
UD
2210 last_seq = seq;
2211 last_wch = wch;
2212 memcpy (last_charcode, now->val.charcode.bytes, 16);
2213 last_charcode_len = now->val.charcode.nbytes;
2214
2215 if (!ignore_content && handle_digits == 1)
2216 {
2217 /* We must store the digit values. */
2218 if (ctype->mbdigits_act == ctype->mbdigits_max)
2219 {
b9eb05d6 2220 ctype->mbdigits_max += 10;
4b10dd6c
UD
2221 ctype->mbdigits = xrealloc (ctype->mbdigits,
2222 (ctype->mbdigits_max
2223 * sizeof (char *)));
b9eb05d6 2224 ctype->wcdigits_max += 10;
4b10dd6c
UD
2225 ctype->wcdigits = xrealloc (ctype->wcdigits,
2226 (ctype->wcdigits_max
2227 * sizeof (uint32_t)));
2228 }
2229
2230 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2231 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2232 }
2233 else if (!ignore_content && handle_digits == 2)
2234 {
2235 /* We must store the digit values. */
2236 if (ctype->outdigits_act >= 10)
2237 {
2238 lr_error (ldfile, _("\
2239%s: field `%s' does not contain exactly ten entries"),
2240 "LC_CTYPE", "outdigit");
2241 goto err_label;
2242 }
2243
2244 ctype->mboutdigits[ctype->outdigits_act] = seq;
2245 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2246 ++ctype->outdigits_act;
2247 }
2248 }
2249 else
2250 {
2251 /* Now it gets complicated. We have to resolve the
2252 ellipsis problem. First we must distinguish between
2253 the different kind of ellipsis and this must match the
2254 tokens we have seen. */
2255 assert (last_token != tok_none);
2256
2257 if (last_token != now->tok)
2258 {
2259 lr_error (ldfile, _("\
2260ellipsis range must be marked by two operands of same type"));
2261 lr_ignore_rest (ldfile, 0);
2262 break;
2263 }
2264
2265 if (last_token == tok_bsymbol)
2266 {
2267 if (ellipsis_token == tok_ellipsis3)
2268 lr_error (ldfile, _("with symbolic name range values \
2269the absolute ellipsis `...' must not be used"));
2270
2271 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2272 repertoire, now, last_str,
2273 class256_bit, class_bit,
2274 (ellipsis_token
2275 == tok_ellipsis4
2276 ? 10 : 16),
2277 ignore_content,
a0dc5206 2278 handle_digits, step);
4b10dd6c
UD
2279 }
2280 else if (last_token == tok_ucs4)
2281 {
2282 if (ellipsis_token != tok_ellipsis2)
2283 lr_error (ldfile, _("\
2284with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2285
2286 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2287 repertoire, now, last_wch,
2288 class256_bit, class_bit,
a0dc5206
UD
2289 ignore_content, handle_digits,
2290 step);
4b10dd6c
UD
2291 }
2292 else
2293 {
2294 assert (last_token == tok_charcode);
2295
2296 if (ellipsis_token != tok_ellipsis3)
2297 lr_error (ldfile, _("\
2298with character code range values one must use the absolute ellipsis `...'"));
2299
2300 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2301 repertoire, now,
2302 last_charcode,
2303 last_charcode_len,
2304 class256_bit, class_bit,
2305 ignore_content,
2306 handle_digits);
2307 }
2308
2309 /* Now we have used the last value. */
2310 last_token = tok_none;
2311 }
2312
2313 /* Next we expect a semicolon or the end of the line. */
2314 now = lr_token (ldfile, charmap, NULL);
2315 if (now->tok == tok_eol || now->tok == tok_eof)
2316 break;
2317
2318 if (last_token != tok_none
a0dc5206 2319 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
4b10dd6c 2320 {
a0dc5206
UD
2321 if (now->tok == tok_ellipsis2_2)
2322 {
2323 now->tok = tok_ellipsis2;
2324 step = 2;
2325 }
2326 else if (now->tok == tok_ellipsis4_2)
2327 {
2328 now->tok = tok_ellipsis4;
2329 step = 2;
2330 }
2331
4b10dd6c 2332 ellipsis_token = now->tok;
a0dc5206 2333
4b10dd6c
UD
2334 now = lr_token (ldfile, charmap, NULL);
2335 continue;
2336 }
2337
2338 if (now->tok != tok_semicolon)
2339 goto err_label;
2340
2341 /* And get the next character. */
2342 now = lr_token (ldfile, charmap, NULL);
2343
2344 ellipsis_token = tok_none;
a0dc5206 2345 step = 1;
4b10dd6c
UD
2346 }
2347 break;
2348
2349 case tok_digit:
b9eb05d6
UD
2350 /* Ignore the rest of the line if we don't need the input of
2351 this line. */
2352 if (ignore_content)
42d7c593
UD
2353 {
2354 lr_ignore_rest (ldfile, 0);
2355 break;
2356 }
b9eb05d6 2357
4b10dd6c
UD
2358 handle_tok_digit:
2359 class_bit = _ISwdigit;
2360 class256_bit = _ISdigit;
2361 handle_digits = 1;
2362 goto read_charclass;
2363
2364 case tok_outdigit:
b9eb05d6
UD
2365 /* Ignore the rest of the line if we don't need the input of
2366 this line. */
2367 if (ignore_content)
2368 {
2369 lr_ignore_rest (ldfile, 0);
2370 break;
2371 }
2372
4b10dd6c
UD
2373 if (ctype->outdigits_act != 0)
2374 lr_error (ldfile, _("\
2375%s: field `%s' declared more than once"),
2376 "LC_CTYPE", "outdigit");
2377 class_bit = 0;
2378 class256_bit = 0;
2379 handle_digits = 2;
2380 goto read_charclass;
2381
2382 case tok_toupper:
b9eb05d6
UD
2383 /* Ignore the rest of the line if we don't need the input of
2384 this line. */
2385 if (ignore_content)
2386 {
2387 lr_ignore_rest (ldfile, 0);
2388 break;
2389 }
2390
4b10dd6c
UD
2391 mapidx = 0;
2392 goto read_mapping;
2393
2394 case tok_tolower:
b9eb05d6
UD
2395 /* Ignore the rest of the line if we don't need the input of
2396 this line. */
2397 if (ignore_content)
2398 {
2399 lr_ignore_rest (ldfile, 0);
2400 break;
2401 }
2402
4b10dd6c
UD
2403 mapidx = 1;
2404 goto read_mapping;
2405
2406 case tok_map:
b9eb05d6
UD
2407 /* Ignore the rest of the line if we don't need the input of
2408 this line. */
2409 if (ignore_content)
2410 {
2411 lr_ignore_rest (ldfile, 0);
2412 break;
2413 }
2414
4b10dd6c
UD
2415 /* We simply forget the `map' keyword and use the following
2416 operand to determine the mapping. */
2417 now = lr_token (ldfile, charmap, NULL);
2418 if (now->tok == tok_ident || now->tok == tok_string)
2419 {
2420 size_t cnt;
2421
2422 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2423 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2424 break;
2425
7f653277
UD
2426 if (cnt < ctype->map_collection_nr)
2427 free (now->val.str.startmb);
2428 else
87372aa9
UD
2429 /* OK, it's a new map. */
2430 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2431
2432 mapidx = cnt;
4b10dd6c
UD
2433 }
2434 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2435 goto err_label;
2436 else
2437 mapidx = now->tok - tok_toupper;
2438
2439 now = lr_token (ldfile, charmap, NULL);
2440 /* This better should be a semicolon. */
2441 if (now->tok != tok_semicolon)
2442 goto err_label;
2443
2444 read_mapping:
2445 /* Test whether this mapping was already defined. */
2446 if (ctype->tomap_done[mapidx])
2447 {
2448 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2449 ctype->mapnames[mapidx]);
2450 lr_ignore_rest (ldfile, 0);
2451 break;
2452 }
2453 ctype->tomap_done[mapidx] = 1;
2454
2455 now = lr_token (ldfile, charmap, NULL);
2456 while (now->tok != tok_eol && now->tok != tok_eof)
2457 {
2458 struct charseq *from_seq;
2459 uint32_t from_wch;
2460 struct charseq *to_seq;
2461 uint32_t to_wch;
2462
2463 /* Every pair starts with an opening brace. */
2464 if (now->tok != tok_open_brace)
2465 goto err_label;
2466
2467 /* Next comes the from-value. */
2468 now = lr_token (ldfile, charmap, NULL);
2469 if (get_character (now, charmap, repertoire, &from_seq,
2470 &from_wch) != 0)
2471 goto err_label;
2472
2473 /* The next is a comma. */
2474 now = lr_token (ldfile, charmap, NULL);
2475 if (now->tok != tok_comma)
2476 goto err_label;
2477
2478 /* And the other value. */
2479 now = lr_token (ldfile, charmap, NULL);
2480 if (get_character (now, charmap, repertoire, &to_seq,
2481 &to_wch) != 0)
2482 goto err_label;
2483
2484 /* And the last thing is the closing brace. */
2485 now = lr_token (ldfile, charmap, NULL);
2486 if (now->tok != tok_close_brace)
2487 goto err_label;
2488
2489 if (!ignore_content)
2490 {
2491 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2492 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2493 /* We can use this value. */
2494 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2495 = to_seq->bytes[0];
2496
2497 if (from_wch != ILLEGAL_CHAR_VALUE
2498 && to_wch != ILLEGAL_CHAR_VALUE)
2499 /* Both correct values. */
2500 *find_idx (ctype, &ctype->map_collection[mapidx],
2501 &ctype->map_collection_max[mapidx],
2502 &ctype->map_collection_act[mapidx],
2503 from_wch) = to_wch;
2504 }
2505
2506 /* Now comes a semicolon or the end of the line/file. */
2507 now = lr_token (ldfile, charmap, NULL);
2508 if (now->tok == tok_semicolon)
2509 now = lr_token (ldfile, charmap, NULL);
2510 }
2511 break;
2512
2513 case tok_translit_start:
b9eb05d6
UD
2514 /* Ignore the rest of the line if we don't need the input of
2515 this line. */
2516 if (ignore_content)
2517 {
2518 lr_ignore_rest (ldfile, 0);
2519 break;
2520 }
2521
4b10dd6c
UD
2522 /* The rest of the line better should be empty. */
2523 lr_ignore_rest (ldfile, 1);
2524
2525 /* We count here the number of allocated entries in the `translit'
2526 array. */
2527 cnt = 0;
2528
2529 /* We proceed until we see the `translit_end' token. */
2530 while (now = lr_token (ldfile, charmap, repertoire),
2531 now->tok != tok_translit_end && now->tok != tok_eof)
2532 {
2533 if (now->tok == tok_eol)
2534 /* Ignore empty lines. */
2535 continue;
2536
2537 if (now->tok == tok_translit_end)
2538 {
2539 lr_ignore_rest (ldfile, 0);
2540 break;
2541 }
2542
2543 if (now->tok == tok_include)
2544 {
2545 /* We have to include locale. */
2546 const char *locale_name;
2547 const char *repertoire_name;
2548
2549 now = lr_token (ldfile, charmap, NULL);
2550 /* This should be a string or an identifier. In any
2551 case something to name a locale. */
2552 if (now->tok != tok_string && now->tok != tok_ident)
2553 {
2554 translit_syntax:
2555 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2556 lr_ignore_rest (ldfile, 0);
2557 continue;
2558 }
2559 locale_name = now->val.str.startmb;
2560
2561 /* Next should be a semicolon. */
2562 now = lr_token (ldfile, charmap, NULL);
2563 if (now->tok != tok_semicolon)
2564 goto translit_syntax;
2565
2566 /* Now the repertoire name. */
2567 now = lr_token (ldfile, charmap, NULL);
2568 if ((now->tok != tok_string && now->tok != tok_ident)
2569 || now->val.str.startmb == NULL)
2570 goto translit_syntax;
2571 repertoire_name = now->val.str.startmb;
2572
2573 /* We must not have more than one `include'. */
2574 if (ctype->translit_copy_locale != NULL)
2575 {
2576 lr_error (ldfile, _("\
2577%s: only one `include' instruction allowed"), "LC_CTYPE");
2578 lr_ignore_rest (ldfile, 0);
2579 continue;
2580 }
2581
2582 ctype->translit_copy_locale = locale_name;
2583 ctype->translit_copy_repertoire = repertoire_name;
2584
2585 /* The rest of the line must be empty. */
2586 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2587
2588 /* Make sure the locale is read. */
2589 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
07dab0c3 2590 repertoire_name, 1, NULL);
a673fbcb
UD
2591 continue;
2592 }
2593 else if (now->tok == tok_default_missing)
2594 {
2595 uint32_t *wstr;
2596
2597 /* We expect a single character or string as the
2598 argument. */
2599 now = lr_token (ldfile, charmap, NULL);
2600 wstr = read_widestring (ldfile, now, charmap, repertoire);
2601
2602 if (wstr != NULL)
2603 {
2604 if (ctype->default_missing != NULL)
2605 {
2606 lr_error (ldfile, _("\
2607%s: duplicate `default_missing' definition"), "LC_CTYPE");
2608 error_at_line (0, 0, ctype->default_missing_file,
2609 ctype->default_missing_lineno,
2610 _("previous definition was here"));
2611 }
2612 else
2613 {
2614 ctype->default_missing = wstr;
2615 ctype->default_missing_file = ldfile->fname;
2616 ctype->default_missing_lineno = ldfile->lineno;
2617 }
2618 }
2619 lr_ignore_rest (ldfile, 1);
2620 continue;
2621 }
2622 else if (now->tok == tok_translit_ignore)
2623 {
2624 read_translit_ignore_entry (ldfile, ctype, charmap,
2625 repertoire);
4b10dd6c
UD
2626 continue;
2627 }
2628
2629 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2630 }
2631 break;
2632
2633 case tok_ident:
b9eb05d6
UD
2634 /* Ignore the rest of the line if we don't need the input of
2635 this line. */
2636 if (ignore_content)
2637 {
2638 lr_ignore_rest (ldfile, 0);
2639 break;
2640 }
2641
4b10dd6c
UD
2642 /* This could mean one of several things. First test whether
2643 it's a character class name. */
2644 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2645 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2646 break;
2647 if (cnt < ctype->nr_charclass)
2648 {
2649 class_bit = _ISwbit (cnt);
2650 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2651 free (now->val.str.startmb);
2652 goto read_charclass;
2653 }
5491da0d
UD
2654 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2655 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2656 break;
2657 if (cnt < ctype->map_collection_nr)
2658 {
2659 mapidx = cnt;
2660 free (now->val.str.startmb);
2661 goto read_mapping;
2662 }
011ebfab 2663#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2664 if (strcmp (now->val.str.startmb, "special1") == 0)
2665 {
2666 class_bit = _ISwspecial1;
2667 free (now->val.str.startmb);
2668 goto read_charclass;
2669 }
2670 if (strcmp (now->val.str.startmb, "special2") == 0)
2671 {
2672 class_bit = _ISwspecial2;
2673 free (now->val.str.startmb);
2674 goto read_charclass;
2675 }
2676 if (strcmp (now->val.str.startmb, "special3") == 0)
2677 {
2678 class_bit = _ISwspecial3;
2679 free (now->val.str.startmb);
2680 goto read_charclass;
2681 }
2682 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2683 {
2684 mapidx = 2;
2685 goto read_mapping;
2686 }
011ebfab 2687#endif
4b10dd6c
UD
2688 break;
2689
2690 case tok_end:
2691 /* Next we assume `LC_CTYPE'. */
2692 now = lr_token (ldfile, charmap, NULL);
2693 if (now->tok == tok_eof)
2694 break;
2695 if (now->tok == tok_eol)
2696 lr_error (ldfile, _("%s: incomplete `END' line"),
2697 "LC_CTYPE");
2698 else if (now->tok != tok_lc_ctype)
2699 lr_error (ldfile, _("\
2700%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2701 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2702 return;
2703
2704 default:
2705 err_label:
2706 if (now->tok != tok_eof)
2707 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2708 }
2709
4b10dd6c
UD
2710 /* Prepare for the next round. */
2711 now = lr_token (ldfile, charmap, NULL);
2712 nowtok = now->tok;
19bc17a9
RM
2713 }
2714
4b10dd6c
UD
2715 /* When we come here we reached the end of the file. */
2716 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2717}
2718
2719
2720static void
4b10dd6c
UD
2721set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2722 struct repertoire_t *repertoire)
19bc17a9 2723{
4b10dd6c
UD
2724 size_t cnt;
2725
19bc17a9
RM
2726 /* These function defines the default values for the classes and conversions
2727 according to POSIX.2 2.5.2.1.
2728 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2729 Don't move them unless you know what you do! */
2730
4b10dd6c 2731 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2732 {
2733 char tmp[2];
2734 int ch;
4b10dd6c
UD
2735 int bit = _ISbit (bitpos);
2736 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2737 /* Define string. */
2738 strcpy (tmp, "?");
2739
2740 for (ch = from; ch <= to; ++ch)
2741 {
4b10dd6c 2742 struct charseq *seq;
19bc17a9
RM
2743 tmp[0] = ch;
2744
4b10dd6c
UD
2745 seq = charmap_find_value (charmap, tmp, 1);
2746 if (seq == NULL)
2747 {
2748 if (!be_quiet)
2749 error (0, 0, _("\
2750%s: character `%s' not defined in charmap while needed as default value"),
2751 "LC_CTYPE", tmp);
19bc17a9 2752 }
4b10dd6c
UD
2753 else if (seq->nbytes != 1)
2754 error (0, 0, _("\
2755%s: character `%s' in charmap not representable with one byte"),
2756 "LC_CTYPE", tmp);
19bc17a9 2757 else
4b10dd6c 2758 ctype->class256_collection[seq->bytes[0]] |= bit;
f0a4b6b1
UD
2759
2760 /* No need to search here, the ASCII value is also the Unicode
2761 value. */
2762 ELEM (ctype, class_collection, , ch) |= bitw;
19bc17a9
RM
2763 }
2764 }
2765
2766 /* Set default values if keyword was not present. */
4b10dd6c 2767 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2768 /* "If this keyword [lower] is not specified, the lowercase letters
2769 `A' through `Z', ..., shall automatically belong to this class,
2770 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2771 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2772
4b10dd6c 2773 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2774 /* "If this keyword [lower] is not specified, the lowercase letters
2775 `a' through `z', ..., shall automatically belong to this class,
2776 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2777 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2778
4b10dd6c 2779 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2780 {
2781 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2782 class `lower' *must* be in class `alpha'. */
2783 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
2784 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2785
2786 for (cnt = 0; cnt < 256; ++cnt)
2787 if ((ctype->class256_collection[cnt] & mask) != 0)
2788 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9
RM
2789
2790 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2791 if ((ctype->class_collection[cnt] & maskw) != 0)
2792 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
2793 }
2794
4b10dd6c 2795 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2796 /* "If this keyword [digit] is not specified, the digits `0' through
2797 `9', ..., shall automatically belong to this class, with
2798 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2799 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2800
2801 /* "Only characters specified for the `alpha' and `digit' keyword
2802 shall be specified. Characters specified for the keyword `alpha'
2803 and `digit' are automatically included in this class. */
2804 {
2805 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
2806 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2807
2808 for (cnt = 0; cnt < 256; ++cnt)
2809 if ((ctype->class256_collection[cnt] & mask) != 0)
2810 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9
RM
2811
2812 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2813 if ((ctype->class_collection[cnt] & maskw) != 0)
2814 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
2815 }
2816
4b10dd6c 2817 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2818 /* "If this keyword [space] is not specified, the characters <space>,
2819 <form-feed>, <newline>, <carriage-return>, <tab>, and
2820 <vertical-tab>, ..., shall automatically belong to this class,
2821 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2822 {
4b10dd6c 2823 struct charseq *seq;
19bc17a9 2824
4b10dd6c 2825 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2826 if (seq == NULL)
2827 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2828 if (seq == NULL)
2829 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2830 if (seq == NULL)
880f421f
UD
2831 {
2832 if (!be_quiet)
2833 error (0, 0, _("\
4b10dd6c
UD
2834%s: character `%s' not defined while needed as default value"),
2835 "LC_CTYPE", "<space>");
2836 }
2837 else if (seq->nbytes != 1)
2838 error (0, 0, _("\
2839%s: character `%s' in charmap not representable with one byte"),
2840 "LC_CTYPE", "<space>");
2841 else
2842 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2843
f0a4b6b1 2844 /* No need to search. */
ce177a84 2845 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
19bc17a9 2846
4b10dd6c 2847 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
2848 if (seq == NULL)
2849 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 2850 if (seq == NULL)
880f421f
UD
2851 {
2852 if (!be_quiet)
2853 error (0, 0, _("\
4b10dd6c
UD
2854%s: character `%s' not defined while needed as default value"),
2855 "LC_CTYPE", "<form-feed>");
2856 }
2857 else if (seq->nbytes != 1)
2858 error (0, 0, _("\
2859%s: character `%s' in charmap not representable with one byte"),
2860 "LC_CTYPE", "<form-feed>");
2861 else
2862 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2863
f0a4b6b1 2864 /* No need to search. */
ce177a84 2865 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
4b10dd6c 2866
19bc17a9 2867
4b10dd6c 2868 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
2869 if (seq == NULL)
2870 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 2871 if (seq == NULL)
880f421f
UD
2872 {
2873 if (!be_quiet)
2874 error (0, 0, _("\
19bc17a9 2875character `%s' not defined while needed as default value"),
4b10dd6c
UD
2876 "<newline>");
2877 }
2878 else if (seq->nbytes != 1)
2879 error (0, 0, _("\
2880%s: character `%s' in charmap not representable with one byte"),
2881 "LC_CTYPE", "<newline>");
2882 else
2883 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2884
f0a4b6b1 2885 /* No need to search. */
ce177a84 2886 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
4b10dd6c 2887
19bc17a9 2888
4b10dd6c 2889 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
2890 if (seq == NULL)
2891 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 2892 if (seq == NULL)
880f421f
UD
2893 {
2894 if (!be_quiet)
2895 error (0, 0, _("\
4b10dd6c
UD
2896%s: character `%s' not defined while needed as default value"),
2897 "LC_CTYPE", "<carriage-return>");
2898 }
2899 else if (seq->nbytes != 1)
2900 error (0, 0, _("\
2901%s: character `%s' in charmap not representable with one byte"),
2902 "LC_CTYPE", "<carriage-return>");
2903 else
2904 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2905
f0a4b6b1 2906 /* No need to search. */
ce177a84 2907 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
4b10dd6c 2908
19bc17a9 2909
4b10dd6c 2910 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2911 if (seq == NULL)
2912 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 2913 if (seq == NULL)
880f421f
UD
2914 {
2915 if (!be_quiet)
2916 error (0, 0, _("\
4b10dd6c
UD
2917%s: character `%s' not defined while needed as default value"),
2918 "LC_CTYPE", "<tab>");
2919 }
2920 else if (seq->nbytes != 1)
2921 error (0, 0, _("\
2922%s: character `%s' in charmap not representable with one byte"),
2923 "LC_CTYPE", "<tab>");
2924 else
2925 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2926
f0a4b6b1 2927 /* No need to search. */
ce177a84 2928 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
4b10dd6c 2929
4b10dd6c
UD
2930
2931 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
2932 if (seq == NULL)
2933 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
2934 if (seq == NULL)
2935 {
2936 if (!be_quiet)
2937 error (0, 0, _("\
2938%s: character `%s' not defined while needed as default value"),
2939 "LC_CTYPE", "<vertical-tab>");
2940 }
2941 else if (seq->nbytes != 1)
2942 error (0, 0, _("\
2943%s: character `%s' in charmap not representable with one byte"),
2944 "LC_CTYPE", "<vertical-tab>");
2945 else
2946 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
2947
2948 /* No need to search. */
ce177a84 2949 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
19bc17a9
RM
2950 }
2951
4b10dd6c 2952 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
2953 /* "If this keyword is not specified, the digits `0' to `9', the
2954 uppercase letters `A' through `F', and the lowercase letters `a'
2955 through `f', ..., shell automatically belong to this class, with
2956 implementation defined character values." [P1003.2, 2.5.2.1] */
2957 {
4b10dd6c
UD
2958 set_default (BITPOS (tok_xdigit), '0', '9');
2959 set_default (BITPOS (tok_xdigit), 'A', 'F');
2960 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
2961 }
2962
4b10dd6c 2963 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
2964 /* "If this keyword [blank] is unspecified, the characters <space> and
2965 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
2966 {
4b10dd6c 2967 struct charseq *seq;
19bc17a9 2968
4b10dd6c 2969 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2970 if (seq == NULL)
2971 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2972 if (seq == NULL)
2973 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2974 if (seq == NULL)
880f421f
UD
2975 {
2976 if (!be_quiet)
2977 error (0, 0, _("\
4b10dd6c
UD
2978%s: character `%s' not defined while needed as default value"),
2979 "LC_CTYPE", "<space>");
2980 }
2981 else if (seq->nbytes != 1)
2982 error (0, 0, _("\
2983%s: character `%s' in charmap not representable with one byte"),
2984 "LC_CTYPE", "<space>");
2985 else
2986 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2987
f0a4b6b1 2988 /* No need to search. */
ce177a84 2989 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
4b10dd6c 2990
4b10dd6c
UD
2991
2992 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2993 if (seq == NULL)
2994 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
2995 if (seq == NULL)
2996 {
2997 if (!be_quiet)
2998 error (0, 0, _("\
2999%s: character `%s' not defined while needed as default value"),
3000 "LC_CTYPE", "<tab>");
3001 }
3002 else if (seq->nbytes != 1)
3003 error (0, 0, _("\
3004%s: character `%s' in charmap not representable with one byte"),
3005 "LC_CTYPE", "<tab>");
3006 else
3007 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
3008
3009 /* No need to search. */
ce177a84 3010 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
19bc17a9
RM
3011 }
3012
4b10dd6c 3013 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
3014 /* "If this keyword [graph] is not specified, characters specified for
3015 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3016 shall belong to this character class." [P1003.2, 2.5.2.1] */
3017 {
3018 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3019 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3020 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3021 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3022 BITw (tok_punct);
19bc17a9
RM
3023 size_t cnt;
3024
3025 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3026 if ((ctype->class_collection[cnt] & maskw) != 0)
3027 ctype->class_collection[cnt] |= BITw (tok_graph);
4b10dd6c
UD
3028
3029 for (cnt = 0; cnt < 256; ++cnt)
3030 if ((ctype->class256_collection[cnt] & mask) != 0)
3031 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
3032 }
3033
4b10dd6c 3034 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
3035 /* "If this keyword [print] is not provided, characters specified for
3036 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3037 and the <space> character shall belong to this character class."
3038 [P1003.2, 2.5.2.1] */
3039 {
3040 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3041 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3042 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3043 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3044 BITw (tok_punct);
19bc17a9 3045 size_t cnt;
4b10dd6c 3046 struct charseq *seq;
19bc17a9
RM
3047
3048 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3049 if ((ctype->class_collection[cnt] & maskw) != 0)
3050 ctype->class_collection[cnt] |= BITw (tok_print);
19bc17a9 3051
4b10dd6c
UD
3052 for (cnt = 0; cnt < 256; ++cnt)
3053 if ((ctype->class256_collection[cnt] & mask) != 0)
3054 ctype->class256_collection[cnt] |= BIT (tok_print);
3055
3056
4b10dd6c 3057 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3058 if (seq == NULL)
3059 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3060 if (seq == NULL)
3061 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
3062 if (seq == NULL)
3063 {
3064 if (!be_quiet)
3065 error (0, 0, _("\
3066%s: character `%s' not defined while needed as default value"),
3067 "LC_CTYPE", "<space>");
3068 }
3069 else if (seq->nbytes != 1)
3070 error (0, 0, _("\
3071%s: character `%s' in charmap not representable with one byte"),
3072 "LC_CTYPE", "<space>");
3073 else
3074 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
3075
3076 /* No need to search. */
ce177a84 3077 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
19bc17a9
RM
3078 }
3079
4b10dd6c 3080 if (ctype->tomap_done[0] == 0)
6d52618b 3081 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
3082 `a' through `z', and their corresponding uppercase letters `A' to
3083 `Z', ..., shall automatically be included, with implementation-
3084 defined character values." [P1003.2, 2.5.2.1] */
3085 {
3086 char tmp[4];
3087 int ch;
3088
3089 strcpy (tmp, "<?>");
3090
3091 for (ch = 'a'; ch <= 'z'; ++ch)
3092 {
4b10dd6c 3093 struct charseq *seq_from, *seq_to;
19bc17a9
RM
3094
3095 tmp[1] = (char) ch;
3096
4b10dd6c
UD
3097 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3098 if (seq_from == NULL)
19bc17a9 3099 {
880f421f
UD
3100 if (!be_quiet)
3101 error (0, 0, _("\
4b10dd6c
UD
3102%s: character `%s' not defined while needed as default value"),
3103 "LC_CTYPE", tmp);
3104 }
3105 else if (seq_from->nbytes != 1)
3106 {
3107 if (!be_quiet)
3108 error (0, 0, _("\
3109%s: character `%s' needed as default value not representable with one byte"),
3110 "LC_CTYPE", tmp);
3111 }
3112 else
3113 {
3114 /* This conversion is implementation defined. */
3115 tmp[1] = (char) (ch + ('A' - 'a'));
3116 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3117 if (seq_to == NULL)
3118 {
3119 if (!be_quiet)
3120 error (0, 0, _("\
3121%s: character `%s' not defined while needed as default value"),
3122 "LC_CTYPE", tmp);
3123 }
3124 else if (seq_to->nbytes != 1)
3125 {
3126 if (!be_quiet)
3127 error (0, 0, _("\
3128%s: character `%s' needed as default value not representable with one byte"),
3129 "LC_CTYPE", tmp);
3130 }
3131 else
3132 /* The index [0] is determined by the order of the
3133 `ctype_map_newP' calls in `ctype_startup'. */
3134 ctype->map256_collection[0][seq_from->bytes[0]]
3135 = seq_to->bytes[0];
19bc17a9 3136 }
f0a4b6b1
UD
3137
3138 /* No need to search. */
3139 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
3140 }
3141 }
3142
4b10dd6c 3143 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
3144 /* "If this keyword [tolower] is not specified, the mapping shall be
3145 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3146 {
19bc17a9
RM
3147 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3148 if (ctype->map_collection[0][cnt] != 0)
3149 ELEM (ctype, map_collection, [1],
3150 ctype->map_collection[0][cnt])
3151 = ctype->charnames[cnt];
4b10dd6c
UD
3152
3153 for (cnt = 0; cnt < 256; ++cnt)
3154 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 3155 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
3156 }
3157
3158 if (ctype->outdigits_act == 0)
3159 {
3160 for (cnt = 0; cnt < 10; ++cnt)
3161 {
3162 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3163 digits + cnt, 1);
3164
3165 if (ctype->mboutdigits[cnt] == NULL)
1b97149d
UD
3166 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3167 longnames[cnt],
3168 strlen (longnames[cnt]));
b9eb05d6 3169
1b97149d
UD
3170 if (ctype->mboutdigits[cnt] == NULL)
3171 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3172 uninames[cnt], 9);
b9eb05d6 3173
1b97149d 3174 if (ctype->mboutdigits[cnt] == NULL)
b9eb05d6 3175 {
1b97149d
UD
3176 /* Provide a replacement. */
3177 error (0, 0, _("\
3178no output digits defined and none of the standard names in the charmap"));
b9eb05d6 3179
1b97149d
UD
3180 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3181 sizeof (struct charseq)
3182 + 1);
b9eb05d6 3183
1b97149d
UD
3184 /* This is better than nothing. */
3185 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3186 ctype->mboutdigits[cnt]->nbytes = 1;
b9eb05d6 3187 }
1b97149d
UD
3188
3189 ctype->wcoutdigits[cnt] = L'0' + cnt;
4b10dd6c
UD
3190 }
3191
3192 ctype->outdigits_act = 10;
19bc17a9
RM
3193 }
3194}
3195
3196
3197static void
4b10dd6c
UD
3198allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3199 struct repertoire_t *repertoire)
19bc17a9
RM
3200{
3201 size_t idx;
a53e3292 3202 size_t width_table_size;
0e16ecfa
UD
3203 const void *key;
3204 size_t len;
3205 void *vdata;
3206 void *curs;
5d431a3e 3207
6d52618b
UD
3208 /* First we have to decide how we organize the arrays. It is easy
3209 for a one-byte character set. But multi-byte character set
3210 cannot be stored flat because the chars might be sparsely used.
3211 So we determine an optimal hashing function for the used
3212 characters.
3213
3214 We use a very trivial hashing function to store the sparse
3215 table. CH % TABSIZE is used as an index. To solve multiple hits
3216 we have N planes. This guarantees a fixed search time for a
42d7c593 3217 character [N / 2]. In the following code we determine the minimum
66ac0abe
UD
3218 value for TABSIZE * N, where TABSIZE >= 256.
3219
3220 Some people complained that this algorithm takes too long. Well,
3221 go on, improve it. But changing the step size is *not* an
3222 option. Some people changed this to use only sizes of prime
3223 numbers. Think again, do some math. We are looking for the
3224 optimal solution, not something which works in general. Unless
3225 somebody can provide a dynamic programming solution I think this
3226 implementation is as good as it can get. */
19bc17a9
RM
3227 size_t min_total = UINT_MAX;
3228 size_t act_size = 256;
3229
66ac0abe 3230 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3231 fputs (_("\
19bc17a9 3232Computing table size for character classes might take a while..."),
c84142e8 3233 stderr);
19bc17a9 3234
66ac0abe
UD
3235 /* While we want to have a small total size we are willing to use a
3236 little bit larger table if this reduces the number of layers.
3237 Therefore we add a little penalty to the number of planes.
3238 Maybe this constant has to be adjusted a bit. */
3239#define PENALTY 128
3240 do
19bc17a9
RM
3241 {
3242 size_t cnt[act_size];
3243 size_t act_planes = 1;
3244
3245 memset (cnt, '\0', sizeof cnt);
3246
3247 for (idx = 0; idx < 256; ++idx)
3248 cnt[idx] = 1;
3249
3250 for (idx = 0; idx < ctype->charnames_act; ++idx)
3251 if (ctype->charnames[idx] >= 256)
3252 {
3253 size_t nr = ctype->charnames[idx] % act_size;
3254
3255 if (++cnt[nr] > act_planes)
3256 {
3257 act_planes = cnt[nr];
66ac0abe 3258 if ((act_size + PENALTY) * act_planes >= min_total)
19bc17a9
RM
3259 break;
3260 }
3261 }
3262
66ac0abe 3263 if ((act_size + PENALTY) * act_planes < min_total)
19bc17a9 3264 {
66ac0abe 3265 min_total = (act_size + PENALTY) * act_planes;
19bc17a9
RM
3266 ctype->plane_size = act_size;
3267 ctype->plane_cnt = act_planes;
3268 }
3269
3270 ++act_size;
3271 }
66ac0abe 3272 while (act_size < min_total);
19bc17a9 3273
66ac0abe 3274 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3275 fputs (_(" done\n"), stderr);
19bc17a9 3276
75cd5204 3277
4a33c2f5
UD
3278 ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3279 * ctype->plane_cnt,
3280 sizeof (uint32_t));
19bc17a9
RM
3281
3282 for (idx = 1; idx < 256; ++idx)
4a33c2f5 3283 ctype->names[idx] = idx;
19bc17a9
RM
3284
3285 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
4a33c2f5 3286 ctype->names[0] = 1;
19bc17a9
RM
3287
3288 for (idx = 256; idx < ctype->charnames_act; ++idx)
3289 {
3290 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3291 size_t depth = 0;
3292
4a33c2f5 3293 while (ctype->names[nr + depth * ctype->plane_size])
19bc17a9
RM
3294 ++depth;
3295 assert (depth < ctype->plane_cnt);
3296
4a33c2f5 3297 ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
19bc17a9
RM
3298
3299 /* Now for faster access remember the index in the NAMES_B array. */
3300 ctype->charnames[idx] = nr + depth * ctype->plane_size;
3301 }
4a33c2f5 3302 ctype->names[0] = 0;
19bc17a9
RM
3303
3304
3305 /* You wonder about this amount of memory? This is only because some
3306 users do not manage to address the array with unsigned values or
3307 data types with range >= 256. '\200' would result in the array
3308 index -128. To help these poor people we duplicate the entries for
3309 128 up to 255 below the entry for \0. */
3310 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3311 sizeof (char_class_t));
3312 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
3313 * ctype->plane_cnt,
3314 sizeof (char_class32_t));
3315
4a33c2f5 3316 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 3317 for (idx = 0; idx < 256; ++idx)
4a33c2f5 3318 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 3319
75cd5204
RM
3320 /* Mirror first 127 entries. We must take care that entry -1 is not
3321 mirrored because EOF == -1. */
3322 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
3323 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3324
3325 /* The 32 bit array contains all characters. */
3326 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4a33c2f5 3327 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
19bc17a9
RM
3328
3329 /* Room for table of mappings. */
49f2be5b
UD
3330 ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3331 ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
4a33c2f5 3332 * sizeof (uint32_t *));
19bc17a9
RM
3333
3334 /* Fill in all mappings. */
49f2be5b 3335 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
3336 {
3337 unsigned int idx2;
3338
3339 /* Allocate table. */
49f2be5b 3340 ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
3341
3342 /* Copy values from collection. */
4b10dd6c 3343 for (idx2 = 0; idx2 < 256; ++idx2)
4a33c2f5 3344 ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 3345
75cd5204
RM
3346 /* Mirror first 127 entries. We must take care not to map entry
3347 -1 because EOF == -1. */
3348 for (idx2 = 0; idx2 < 127; ++idx2)
4a33c2f5 3349 ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
19bc17a9 3350
75cd5204 3351 /* EOF must map to EOF. */
4a33c2f5 3352 ctype->map[idx][127] = EOF;
49f2be5b 3353 }
a9c27b3e 3354
49f2be5b
UD
3355 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3356 {
3357 unsigned int idx2;
3358
3359 /* Allocate table. */
f1d8b804
UD
3360 ctype->map32[idx] = (uint32_t *) xmalloc (ctype->plane_size
3361 * ctype->plane_cnt
3362 * sizeof (uint32_t));
49f2be5b
UD
3363
3364 /* Copy default value (identity mapping). */
f1d8b804 3365 memcpy (ctype->map32[idx], ctype->names,
49f2be5b
UD
3366 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3367
3368 /* Copy values from collection. */
3369 for (idx2 = 0; idx2 < 256; ++idx2)
a9c27b3e 3370 if (ctype->map_collection[idx][idx2] != 0)
f1d8b804
UD
3371 ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
3372
3373 while (idx2 < ctype->map_collection_act[idx])
b06c53e7
UD
3374 {
3375 if (ctype->map_collection[idx][idx2] != 0)
450bf66e
UD
3376 ctype->map32[idx][ctype->charnames[idx2]] =
3377 ctype->map_collection[idx][idx2];
b06c53e7
UD
3378 ++idx2;
3379 }
19bc17a9
RM
3380 }
3381
3382 /* Extra array for class and map names. */
4b10dd6c
UD
3383 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3384 * sizeof (uint32_t));
3385 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3386 * sizeof (uint32_t));
75cd5204
RM
3387
3388 /* Array for width information. Because the expected width are very
3389 small we use only one single byte. This save space and we need
3390 not provide the information twice with both endianesses. */
5866b131
UD
3391 width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
3392 ctype->width = (unsigned char *) xmalloc (width_table_size);
3393
0e16ecfa
UD
3394 /* Initialize with -1. */
3395 memset (ctype->width, '\xff', width_table_size);
4b10dd6c 3396 if (charmap->width_rules != NULL)
75cd5204
RM
3397 {
3398 size_t cnt;
3399
4b10dd6c 3400 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
827ff758
UD
3401 {
3402 unsigned char bytes[charmap->mb_cur_max];
3403 int nbytes = charmap->width_rules[cnt].from->nbytes;
3404
3405 /* We have the range of character for which the width is
3406 specified described using byte sequences of the multibyte
3407 charset. We have to convert this to UCS4 now. And we
3408 cannot simply convert the beginning and the end of the
3409 sequence, we have to iterate over the byte sequence and
3410 convert it for every single character. */
3411 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3412
3413 while (nbytes < charmap->width_rules[cnt].to->nbytes
3414 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3415 nbytes) <= 0)
75cd5204 3416 {
827ff758 3417 /* Find the UCS value for `bytes'. */
827ff758 3418 int inner;
76e680a8
UD
3419 uint32_t wch;
3420 struct charseq *seq =
3421 charmap_find_symbol (charmap, bytes, nbytes);
3422
3423 if (seq == NULL)
3424 wch = ILLEGAL_CHAR_VALUE;
3425 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3426 wch = seq->ucs4;
3427 else
3428 wch = repertoire_find_value (ctype->repertoire, seq->name,
3429 strlen (seq->name));
827ff758
UD
3430
3431 if (wch != ILLEGAL_CHAR_VALUE)
3432 {
3433 /* Store the value. */
b1c9ad82 3434 size_t nr = wch % ctype->plane_size;
827ff758
UD
3435 size_t depth = 0;
3436
b1c9ad82 3437 while (ctype->names[nr + depth * ctype->plane_size] != wch)
0e16ecfa
UD
3438 {
3439 ++depth;
3440 assert (depth < ctype->plane_cnt);
3441 }
827ff758
UD
3442
3443 ctype->width[nr + depth * ctype->plane_size]
3444 = charmap->width_rules[cnt].width;
3445 }
3446
3447 /* "Increment" the bytes sequence. */
3448 inner = nbytes - 1;
3449 while (inner >= 0 && bytes[inner] == 0xff)
3450 --inner;
75cd5204 3451
827ff758
UD
3452 if (inner < 0)
3453 {
3454 /* We have to extend the byte sequence. */
3455 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3456 break;
75cd5204 3457
827ff758
UD
3458 bytes[0] = 1;
3459 memset (&bytes[1], 0, nbytes);
3460 ++nbytes;
3461 }
3462 else
3463 {
3464 ++bytes[inner];
3465 while (++inner < nbytes)
3466 bytes[inner] = 0;
3467 }
75cd5204 3468 }
827ff758 3469 }
75cd5204 3470 }
0200214b 3471
0e16ecfa
UD
3472 /* Now set all the other characters of the character set to the
3473 default width. */
3474 curs = NULL;
3475 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3476 {
3477 struct charseq *data = (struct charseq *) vdata;
3478 size_t nr;
3479 size_t depth;
3480
3481 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3482 data->ucs4 = repertoire_find_value (ctype->repertoire,
3483 data->name, len);
3484
3485 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3486 {
3487 nr = data->ucs4 % ctype->plane_size;
3488 depth = 0;
3489
3490 while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
3491 {
3492 ++depth;
3493 assert (depth < ctype->plane_cnt);
3494 }
3495
3496 if (ctype->width[nr + depth * ctype->plane_size]
3497 == (unsigned char) '\xff')
3498 ctype->width[nr + depth * ctype->plane_size] =
3499 charmap->width_default;
3500 }
3501 }
3502
4b10dd6c
UD
3503 /* Set MB_CUR_MAX. */
3504 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 3505
4b10dd6c
UD
3506 /* Now determine the table for the transliteration information.
3507
3508 XXX It is not yet clear to me whether it is worth implementing a
3509 complicated algorithm which uses a hash table to locate the entries.
3510 For now I'll use a simple array which can be searching using binary
3511 search. */
3512 if (ctype->translit_copy_locale != NULL)
3513 {
3514 /* Fold in the transliteration information from the locale mentioned
3515 in the `include' statement. */
3516 struct locale_ctype_t *here = ctype;
3517
3518 do
3519 {
3520 struct localedef_t *other = find_locale (LC_CTYPE,
3521 here->translit_copy_locale,
3522 repertoire->name, charmap);
3523
3524 if (other == NULL)
3525 {
3526 error (0, 0, _("\
3527%s: transliteration data from locale `%s' not available"),
3528 "LC_CTYPE", here->translit_copy_locale);
3529 break;
3530 }
3531
3532 here = other->categories[LC_CTYPE].ctype;
3533
3534 /* Enqueue the information if necessary. */
3535 if (here->translit != NULL)
3536 {
3537 struct translit_t *endp = here->translit;
3538 while (endp->next != NULL)
3539 endp = endp->next;
3540
3541 endp->next = ctype->translit;
3542 ctype->translit = here->translit;
3543 }
3544 }
3545 while (here->translit_copy_locale != NULL);
3546 }
3547
3548 if (ctype->translit != NULL)
3549 {
3550 /* First count how many entries we have. This is the upper limit
3551 since some entries from the included files might be overwritten. */
3552 size_t number = 0;
3553 size_t cnt;
3554 struct translit_t *runp = ctype->translit;
3555 struct translit_t **sorted;
3556 size_t from_len, to_len;
3557
3558 while (runp != NULL)
3559 {
3560 ++number;
3561 runp = runp->next;
3562 }
3563
3564 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
3565 sorted = (struct translit_t **) alloca (number
3566 * sizeof (struct translit_t **));
4b10dd6c
UD
3567 runp = ctype->translit;
3568 number = 0;
3569 do
3570 {
3571 /* Search for the place where to insert this string.
3572 XXX Better use a real sorting algorithm later. */
3573 size_t idx = 0;
3574 int replace = 0;
3575
3576 while (idx < number)
3577 {
3578 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3579 (const wchar_t *) runp->from);
3580 if (res == 0)
3581 {
3582 replace = 1;
3583 break;
3584 }
3585 if (res > 0)
3586 break;
3587 ++idx;
3588 }
3589
3590 if (replace)
3591 sorted[idx] = runp;
3592 else
3593 {
3594 memmove (&sorted[idx + 1], &sorted[idx],
3595 (number - idx) * sizeof (struct translit_t *));
3596 sorted[idx] = runp;
3597 ++number;
3598 }
3599
3600 runp = runp->next;
3601 }
3602 while (runp != NULL);
3603
3604 /* The next step is putting all the possible transliteration
3605 strings in one memory block so that we can write it out.
3606 We need several different blocks:
9ca23765 3607 - index to the from-string array
4b10dd6c
UD
3608 - from-string array
3609 - index to the to-string array
3610 - to-string array.
4b10dd6c
UD
3611 */
3612 from_len = to_len = 0;
3613 for (cnt = 0; cnt < number; ++cnt)
3614 {
3615 struct translit_to_t *srunp;
3616 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3617 srunp = sorted[cnt]->to;
3618 while (srunp != NULL)
3619 {
3620 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3621 srunp = srunp->next;
3622 }
3623 /* Plus one for the extra NUL character marking the end of
3624 the list for the current entry. */
3625 ++to_len;
3626 }
3627
3628 /* We can allocate the arrays for the results. */
4a33c2f5
UD
3629 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3630 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3631 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3632 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
3633
3634 from_len = 0;
3635 to_len = 0;
3636 for (cnt = 0; cnt < number; ++cnt)
3637 {
3638 size_t len;
3639 struct translit_to_t *srunp;
3640
4a33c2f5
UD
3641 ctype->translit_from_idx[cnt] = from_len;
3642 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3643
3644 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 3645 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
3646 (const wchar_t *) sorted[cnt]->from, len);
3647 from_len += len;
3648
4a33c2f5 3649 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3650 srunp = sorted[cnt]->to;
3651 while (srunp != NULL)
3652 {
3653 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 3654 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
3655 (const wchar_t *) srunp->str, len);
3656 to_len += len;
3657 srunp = srunp->next;
3658 }
4a33c2f5 3659 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 3660 }
4b10dd6c
UD
3661
3662 /* Store the information about the length. */
04fbc779 3663 ctype->translit_idx_size = number;
4b10dd6c
UD
3664 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3665 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3666 }
3667 else
3668 {
3669 /* Provide some dummy pointers since we have nothing to write out. */
3670 static uint32_t no_str = { 0 };
3671
4a33c2f5
UD
3672 ctype->translit_from_idx = &no_str;
3673 ctype->translit_from_tbl = &no_str;
3674 ctype->translit_to_tbl = &no_str;
4b10dd6c
UD
3675 ctype->translit_idx_size = 0;
3676 ctype->translit_from_tbl_size = 0;
3677 ctype->translit_to_tbl_size = 0;
3678 }
19bc17a9 3679}