]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
01ff9d0b 1/* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
4b10dd6c 25#include <byteswap.h>
19bc17a9 26#include <endian.h>
4b10dd6c 27#include <errno.h>
19bc17a9 28#include <limits.h>
4b10dd6c
UD
29#include <obstack.h>
30#include <stdlib.h>
19bc17a9 31#include <string.h>
4b10dd6c
UD
32#include <wchar.h>
33#include <wctype.h>
34#include <sys/uio.h>
19bc17a9 35
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c
UD
41#include "locfile.h"
42#include "localedef.h"
19bc17a9 43
19bc17a9
RM
44#include <assert.h>
45
46
011ebfab 47#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
48/* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
011ebfab
UD
50# define _ISwspecial1 (1 << 29)
51# define _ISwspecial2 (1 << 30)
52# define _ISwspecial3 (1 << 31)
53#endif
19bc17a9
RM
54
55
56/* The bit used for representing a special class. */
57#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
58#define BIT(class) (_ISbit (BITPOS (class)))
59#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
60
61#define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
19bc17a9
RM
65
66/* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
4b10dd6c 69#define char_class_t uint16_t
4b10dd6c 70#define char_class32_t uint32_t
4b10dd6c
UD
71
72
73/* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77struct translit_to_t
78{
79 uint32_t *str;
80
81 struct translit_to_t *next;
82};
83
84struct translit_t
85{
86 uint32_t *from;
87
a673fbcb
UD
88 const char *fname;
89 size_t lineno;
90
4b10dd6c
UD
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94};
19bc17a9 95
a673fbcb
UD
96struct translit_ignore_t
97{
98 uint32_t from;
99 uint32_t to;
100
101 const char *fname;
102 size_t lineno;
103
104 struct translit_ignore_t *next;
105};
106
19bc17a9
RM
107
108/* The real definition of the struct for the LC_CTYPE locale. */
109struct locale_ctype_t
110{
4b10dd6c 111 uint32_t *charnames;
19bc17a9
RM
112 size_t charnames_max;
113 size_t charnames_act;
114
4b10dd6c
UD
115 struct repertoire_t *repertoire;
116
117 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
118#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 119 size_t nr_charclass;
19bc17a9 120 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
121 uint32_t last_class_char;
122 uint32_t class256_collection[256];
123 uint32_t *class_collection;
19bc17a9
RM
124 size_t class_collection_max;
125 size_t class_collection_act;
4b10dd6c
UD
126 uint32_t class_done;
127
128 struct charseq **mbdigits;
129 size_t mbdigits_act;
130 size_t mbdigits_max;
131 uint32_t *wcdigits;
132 size_t wcdigits_act;
133 size_t wcdigits_max;
134
135 struct charseq *mboutdigits[10];
136 uint32_t wcoutdigits[10];
137 size_t outdigits_act;
19bc17a9
RM
138
139 /* If the following number ever turns out to be too small simply
140 increase it. But I doubt it will. --drepper@gnu */
141#define MAX_NR_CHARMAP 16
142 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
143 uint32_t *map_collection[MAX_NR_CHARMAP];
144 uint32_t map256_collection[2][256];
9a0a462c
UD
145 size_t map_collection_max[MAX_NR_CHARMAP];
146 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
147 size_t map_collection_nr;
148 size_t last_map_idx;
4b10dd6c
UD
149 int tomap_done[MAX_NR_CHARMAP];
150
151 /* Transliteration information. */
152 const char *translit_copy_locale;
153 const char *translit_copy_repertoire;
154 struct translit_t *translit;
a673fbcb
UD
155 struct translit_ignore_t *translit_ignore;
156
157 uint32_t *default_missing;
158 const char *default_missing_file;
159 size_t default_missing_lineno;
19bc17a9
RM
160
161 /* The arrays for the binary representation. */
4b10dd6c
UD
162 uint32_t plane_size;
163 uint32_t plane_cnt;
19bc17a9
RM
164 char_class_t *ctype_b;
165 char_class32_t *ctype32_b;
4a33c2f5
UD
166 uint32_t *names;
167 uint32_t **map;
49f2be5b 168 uint32_t **map32;
4b10dd6c
UD
169 uint32_t *class_name_ptr;
170 uint32_t *map_name_ptr;
75cd5204 171 unsigned char *width;
4b10dd6c 172 uint32_t mb_cur_max;
6990326c 173 const char *codeset_name;
4a33c2f5
UD
174 uint32_t translit_hash_size;
175 uint32_t translit_hash_layers;
176 uint32_t *translit_from_idx;
177 uint32_t *translit_from_tbl;
178 uint32_t *translit_to_idx;
179 uint32_t *translit_to_tbl;
4b10dd6c
UD
180 size_t translit_idx_size;
181 size_t translit_from_tbl_size;
182 size_t translit_to_tbl_size;
183
a673fbcb 184 struct obstack mempool;
19bc17a9
RM
185};
186
187
4b10dd6c
UD
188#define obstack_chunk_alloc xmalloc
189#define obstack_chunk_free free
190
191
19bc17a9 192/* Prototypes for local functions. */
4b10dd6c
UD
193static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
194 struct charmap_t *charmap, int ignore_content);
195static void ctype_class_new (struct linereader *lr,
196 struct locale_ctype_t *ctype, const char *name);
197static void ctype_map_new (struct linereader *lr,
198 struct locale_ctype_t *ctype,
199 const char *name, struct charmap_t *charmap);
200static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
201 size_t *max, size_t *act, unsigned int idx);
19bc17a9 202static void set_class_defaults (struct locale_ctype_t *ctype,
4b10dd6c
UD
203 struct charmap_t *charmap,
204 struct repertoire_t *repertoire);
75cd5204 205static void allocate_arrays (struct locale_ctype_t *ctype,
4b10dd6c
UD
206 struct charmap_t *charmap,
207 struct repertoire_t *repertoire);
19bc17a9
RM
208
209
4b10dd6c
UD
210static const char *longnames[] =
211{
212 "zero", "one", "two", "three", "four",
213 "five", "six", "seven", "eight", "nine"
214};
215static const unsigned char digits[] = "0123456789";
216
217
218static void
19bc17a9 219ctype_startup (struct linereader *lr, struct localedef_t *locale,
4b10dd6c 220 struct charmap_t *charmap, int ignore_content)
19bc17a9
RM
221{
222 unsigned int cnt;
223 struct locale_ctype_t *ctype;
224
4b10dd6c 225 if (!ignore_content)
19bc17a9 226 {
4b10dd6c
UD
227 /* Allocate the needed room. */
228 locale->categories[LC_CTYPE].ctype = ctype =
229 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
230
231 /* We have seen no names yet. */
232 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
233 ctype->charnames =
234 (unsigned int *) xmalloc (ctype->charnames_max
235 * sizeof (unsigned int));
236 for (cnt = 0; cnt < 256; ++cnt)
237 ctype->charnames[cnt] = cnt;
238 ctype->charnames_act = 256;
239
240 /* Fill character class information. */
241 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
242 /* The order of the following instructions determines the bit
243 positions! */
244 ctype_class_new (lr, ctype, "upper");
245 ctype_class_new (lr, ctype, "lower");
246 ctype_class_new (lr, ctype, "alpha");
247 ctype_class_new (lr, ctype, "digit");
248 ctype_class_new (lr, ctype, "xdigit");
249 ctype_class_new (lr, ctype, "space");
250 ctype_class_new (lr, ctype, "print");
251 ctype_class_new (lr, ctype, "graph");
252 ctype_class_new (lr, ctype, "blank");
253 ctype_class_new (lr, ctype, "cntrl");
254 ctype_class_new (lr, ctype, "punct");
255 ctype_class_new (lr, ctype, "alnum");
011ebfab 256#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
257 /* The following are extensions from ISO 14652. */
258 ctype_class_new (lr, ctype, "left_to_right");
259 ctype_class_new (lr, ctype, "right_to_left");
260 ctype_class_new (lr, ctype, "num_terminator");
261 ctype_class_new (lr, ctype, "num_separator");
262 ctype_class_new (lr, ctype, "segment_separator");
263 ctype_class_new (lr, ctype, "block_separator");
264 ctype_class_new (lr, ctype, "direction_control");
265 ctype_class_new (lr, ctype, "sym_swap_layout");
266 ctype_class_new (lr, ctype, "char_shape_selector");
267 ctype_class_new (lr, ctype, "num_shape_selector");
268 ctype_class_new (lr, ctype, "non_spacing");
269 ctype_class_new (lr, ctype, "non_spacing_level3");
270 ctype_class_new (lr, ctype, "normal_connect");
271 ctype_class_new (lr, ctype, "r_connect");
272 ctype_class_new (lr, ctype, "no_connect");
273 ctype_class_new (lr, ctype, "no_connect-space");
274 ctype_class_new (lr, ctype, "vowel_connect");
011ebfab 275#endif
4b10dd6c
UD
276
277 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
278 ctype->class_collection
279 = (uint32_t *) xcalloc (sizeof (unsigned long int),
280 ctype->class_collection_max);
281 ctype->class_collection_act = 256;
282
283 /* Fill character map information. */
4b10dd6c
UD
284 ctype->last_map_idx = MAX_NR_CHARMAP;
285 ctype_map_new (lr, ctype, "toupper", charmap);
286 ctype_map_new (lr, ctype, "tolower", charmap);
011ebfab 287#ifdef PREDEFINED_CLASSES
4b10dd6c 288 ctype_map_new (lr, ctype, "tosymmetric", charmap);
011ebfab 289#endif
4b10dd6c
UD
290
291 /* Fill first 256 entries in `toXXX' arrays. */
292 for (cnt = 0; cnt < 256; ++cnt)
293 {
294 ctype->map_collection[0][cnt] = cnt;
295 ctype->map_collection[1][cnt] = cnt;
9e2b7438 296#ifdef PREDEFINED_CLASSES
4b10dd6c 297 ctype->map_collection[2][cnt] = cnt;
9e2b7438 298#endif
4b10dd6c
UD
299 ctype->map256_collection[0][cnt] = cnt;
300 ctype->map256_collection[1][cnt] = cnt;
301 }
302
a673fbcb 303 obstack_init (&ctype->mempool);
19bc17a9
RM
304 }
305}
306
307
308void
4b10dd6c 309ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
19bc17a9
RM
310{
311 /* See POSIX.2, table 2-6 for the meaning of the following table. */
312#define NCLASS 12
313 static const struct
314 {
315 const char *name;
316 const char allow[NCLASS];
317 }
318 valid_table[NCLASS] =
319 {
320 /* The order is important. See token.h for more information.
321 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
322 { "upper", "--MX-XDDXXX-" },
323 { "lower", "--MX-XDDXXX-" },
324 { "alpha", "---X-XDDXXX-" },
325 { "digit", "XXX--XDDXXX-" },
326 { "xdigit", "-----XDDXXX-" },
327 { "space", "XXXXX------X" },
328 { "print", "---------X--" },
329 { "graph", "---------X--" },
330 { "blank", "XXXXXM-----X" },
331 { "cntrl", "XXXXX-XX--XX" },
332 { "punct", "XXXXX-DD-X-X" },
333 { "alnum", "-----XDDXXX-" }
334 };
335 size_t cnt;
336 int cls1, cls2;
4b10dd6c
UD
337 uint32_t space_value;
338 struct charseq *space_seq;
19bc17a9 339 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 340 int warned;
19bc17a9 341
b9eb05d6
UD
342 /* Now resolve copying and also handle completely missing definitions. */
343 if (ctype == NULL)
344 {
70e51ab9
UD
345 const char *repertoire_name;
346
b9eb05d6
UD
347 /* First see whether we were supposed to copy. If yes, find the
348 actual definition. */
349 if (locale->copy_name[LC_CTYPE] != NULL)
350 {
351 /* Find the copying locale. This has to happen transitively since
352 the locale we are copying from might also copying another one. */
353 struct localedef_t *from = locale;
354
355 do
356 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
357 from->repertoire_name, charmap);
358 while (from->categories[LC_CTYPE].ctype == NULL
359 && from->copy_name[LC_CTYPE] != NULL);
360
361 ctype = locale->categories[LC_CTYPE].ctype
362 = from->categories[LC_CTYPE].ctype;
363 }
364
365 /* If there is still no definition issue an warning and create an
366 empty one. */
367 if (ctype == NULL)
368 {
f6ada7ad
UD
369 if (! be_quiet)
370 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
b9eb05d6
UD
371 ctype_startup (NULL, locale, charmap, 0);
372 ctype = locale->categories[LC_CTYPE].ctype;
373 }
70e51ab9
UD
374
375 /* Get the repertoire we have to use. */
376 repertoire_name = locale->repertoire_name ?: repertoire_global;
377 if (repertoire_name != NULL)
378 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
379 }
380
db76d943
UD
381 /* We need the name of the currently used 8-bit character set to
382 make correct conversion between this 8-bit representation and the
383 ISO 10646 character set used internally for wide characters. */
384 ctype->codeset_name = charmap->code_set_name;
385 if (ctype->codeset_name == NULL)
386 {
387 if (! be_quiet)
388 error (0, 0, "no character set name specified in charmap");
389 ctype->codeset_name = "//UNKNOWN//";
390 }
391
19bc17a9 392 /* Set default value for classes not specified. */
4b10dd6c 393 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
394
395 /* Check according to table. */
42d7c593 396 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 397 {
4b10dd6c 398 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 399
4b10dd6c
UD
400 if (tmp != 0)
401 {
402 for (cls1 = 0; cls1 < NCLASS; ++cls1)
403 if ((tmp & _ISwbit (cls1)) != 0)
404 for (cls2 = 0; cls2 < NCLASS; ++cls2)
405 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 406 {
4b10dd6c
UD
407 int eq = (tmp & _ISwbit (cls2)) != 0;
408 switch (valid_table[cls1].allow[cls2])
19bc17a9 409 {
4b10dd6c
UD
410 case 'M':
411 if (!eq)
412 {
413 uint32_t value = ctype->charnames[cnt];
414
415 if (!be_quiet)
416 error (0, 0, _("\
417character L'\\u%0*x' in class `%s' must be in class `%s'"),
418 value > 0xffff ? 8 : 4, value,
419 valid_table[cls1].name,
420 valid_table[cls2].name);
421 }
422 break;
423
424 case 'X':
425 if (eq)
426 {
427 uint32_t value = ctype->charnames[cnt];
428
429 if (!be_quiet)
430 error (0, 0, _("\
431character L'\\u%0*x' in class `%s' must not be in class `%s'"),
432 value > 0xffff ? 8 : 4, value,
433 valid_table[cls1].name,
434 valid_table[cls2].name);
435 }
436 break;
437
438 case 'D':
439 ctype->class_collection[cnt] |= _ISwbit (cls2);
440 break;
441
442 default:
443 error (5, 0, _("internal error in %s, line %u"),
444 __FUNCTION__, __LINE__);
19bc17a9 445 }
4b10dd6c
UD
446 }
447 }
448 }
449
450 for (cnt = 0; cnt < 256; ++cnt)
451 {
452 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 453
4b10dd6c
UD
454 if (tmp != 0)
455 {
456 for (cls1 = 0; cls1 < NCLASS; ++cls1)
457 if ((tmp & _ISbit (cls1)) != 0)
458 for (cls2 = 0; cls2 < NCLASS; ++cls2)
459 if (valid_table[cls1].allow[cls2] != '-')
460 {
461 int eq = (tmp & _ISbit (cls2)) != 0;
462 switch (valid_table[cls1].allow[cls2])
19bc17a9 463 {
4b10dd6c
UD
464 case 'M':
465 if (!eq)
466 {
467 char buf[17];
468
5d431a3e 469 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
470
471 if (!be_quiet)
472 error (0, 0, _("\
473character '%s' in class `%s' must be in class `%s'"),
474 buf, valid_table[cls1].name,
475 valid_table[cls2].name);
476 }
477 break;
478
479 case 'X':
480 if (eq)
481 {
482 char buf[17];
483
5d431a3e 484 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
485
486 if (!be_quiet)
487 error (0, 0, _("\
488character '%s' in class `%s' must not be in class `%s'"),
489 buf, valid_table[cls1].name,
490 valid_table[cls2].name);
491 }
492 break;
493
494 case 'D':
495 ctype->class256_collection[cnt] |= _ISbit (cls2);
496 break;
497
498 default:
499 error (5, 0, _("internal error in %s, line %u"),
500 __FUNCTION__, __LINE__);
19bc17a9 501 }
4b10dd6c
UD
502 }
503 }
19bc17a9
RM
504 }
505
506 /* ... and now test <SP> as a special case. */
4b10dd6c
UD
507 space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
508 if (space_value == ILLEGAL_CHAR_VALUE)
880f421f
UD
509 {
510 if (!be_quiet)
511 error (0, 0, _("character <SP> not defined in character map"));
512 }
c84142e8
UD
513 else if (((cnt = BITPOS (tok_space),
514 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 515 & BITw (tok_space)) == 0)
c84142e8
UD
516 || (cnt = BITPOS (tok_blank),
517 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 518 & BITw (tok_blank)) == 0)))
880f421f
UD
519 {
520 if (!be_quiet)
521 error (0, 0, _("<SP> character not in class `%s'"),
522 valid_table[cnt].name);
523 }
c84142e8
UD
524 else if (((cnt = BITPOS (tok_punct),
525 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 526 & BITw (tok_punct)) != 0)
c84142e8
UD
527 || (cnt = BITPOS (tok_graph),
528 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 529 & BITw (tok_graph))
880f421f
UD
530 != 0)))
531 {
532 if (!be_quiet)
533 error (0, 0, _("<SP> character must not be in class `%s'"),
534 valid_table[cnt].name);
535 }
19bc17a9 536 else
4b10dd6c
UD
537 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
538
539 space_seq = charmap_find_value (charmap, "SP", 2);
540 if (space_seq == NULL || space_seq->nbytes != 1)
541 {
542 if (!be_quiet)
543 error (0, 0, _("character <SP> not defined in character map"));
544 }
545 else if (((cnt = BITPOS (tok_space),
546 (ctype->class256_collection[space_seq->bytes[0]]
547 & BIT (tok_space)) == 0)
548 || (cnt = BITPOS (tok_blank),
549 (ctype->class256_collection[space_seq->bytes[0]]
550 & BIT (tok_blank)) == 0)))
551 {
552 if (!be_quiet)
553 error (0, 0, _("<SP> character not in class `%s'"),
554 valid_table[cnt].name);
555 }
556 else if (((cnt = BITPOS (tok_punct),
557 (ctype->class256_collection[space_seq->bytes[0]]
558 & BIT (tok_punct)) != 0)
559 || (cnt = BITPOS (tok_graph),
560 (ctype->class256_collection[space_seq->bytes[0]]
561 & BIT (tok_graph)) != 0)))
562 {
563 if (!be_quiet)
564 error (0, 0, _("<SP> character must not be in class `%s'"),
565 valid_table[cnt].name);
566 }
567 else
568 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204
RM
569
570 /* Now that the tests are done make sure the name array contains all
571 characters which are handled in the WIDTH section of the
572 character set definition file. */
4b10dd6c
UD
573 if (charmap->width_rules != NULL)
574 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 575 {
827ff758
UD
576 unsigned char bytes[charmap->mb_cur_max];
577 int nbytes = charmap->width_rules[cnt].from->nbytes;
578
579 /* We have the range of character for which the width is
580 specified described using byte sequences of the multibyte
581 charset. We have to convert this to UCS4 now. And we
582 cannot simply convert the beginning and the end of the
583 sequence, we have to iterate over the byte sequence and
584 convert it for every single character. */
585 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
586
587 while (nbytes < charmap->width_rules[cnt].to->nbytes
588 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
589 nbytes) <= 0)
590 {
591 /* Find the UCS value for `bytes'. */
827ff758 592 int inner;
76e680a8
UD
593 uint32_t wch;
594 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
595
596 if (seq == NULL)
597 wch = ILLEGAL_CHAR_VALUE;
598 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
599 wch = seq->ucs4;
600 else
601 wch = repertoire_find_value (ctype->repertoire, seq->name,
602 strlen (seq->name));
827ff758
UD
603
604 if (wch != ILLEGAL_CHAR_VALUE)
605 /* We are only interested in the side-effects of the
606 `find_idx' call. It will add appropriate entries in
607 the name array if this is necessary. */
608 (void) find_idx (ctype, NULL, NULL, NULL, wch);
609
610 /* "Increment" the bytes sequence. */
611 inner = nbytes - 1;
612 while (inner >= 0 && bytes[inner] == 0xff)
613 --inner;
614
615 if (inner < 0)
616 {
617 /* We have to extend the byte sequence. */
618 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
619 break;
620
621 bytes[0] = 1;
622 memset (&bytes[1], 0, nbytes);
623 ++nbytes;
624 }
625 else
626 {
627 ++bytes[inner];
628 while (++inner < nbytes)
629 bytes[inner] = 0;
630 }
631 }
4b10dd6c
UD
632 }
633
634 /* There must be a multiple of 10 digits. */
635 if (ctype->mbdigits_act % 10 != 0)
636 {
637 assert (ctype->mbdigits_act == ctype->wcdigits_act);
638 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
639 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
640 error (0, 0, _("`digit' category has not entries in groups of ten"));
641 }
642
643 /* Check the input digits. There must be a multiple of ten available.
42d7c593 644 In each group it could be that one or the other character is missing.
4b10dd6c
UD
645 In this case the whole group must be removed. */
646 cnt = 0;
647 while (cnt < ctype->mbdigits_act)
648 {
649 size_t inner;
650 for (inner = 0; inner < 10; ++inner)
651 if (ctype->mbdigits[cnt + inner] == NULL)
652 break;
653
654 if (inner == 10)
655 cnt += 10;
656 else
657 {
658 /* Remove the group. */
659 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
660 ((ctype->wcdigits_act - cnt - 10)
661 * sizeof (ctype->mbdigits[0])));
662 ctype->mbdigits_act -= 10;
663 }
664 }
665
666 /* If no input digits are given use the default. */
667 if (ctype->mbdigits_act == 0)
668 {
669 if (ctype->mbdigits_max == 0)
670 {
671 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
672 10 * sizeof (struct charseq *));
673 ctype->mbdigits_max = 10;
674 }
675
676 for (cnt = 0; cnt < 10; ++cnt)
677 {
678 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
679 digits + cnt, 1);
680 if (ctype->mbdigits[cnt] == NULL)
681 {
682 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
683 longnames[cnt],
684 strlen (longnames[cnt]));
685 if (ctype->mbdigits[cnt] == NULL)
686 {
687 /* Hum, this ain't good. */
688 error (0, 0, _("\
689no input digits defined and none of the standard names in the charmap"));
690
691 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
692 sizeof (struct charseq) + 1);
693
694 /* This is better than nothing. */
695 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
696 ctype->mbdigits[cnt]->nbytes = 1;
697 }
698 }
699 }
700
701 ctype->mbdigits_act = 10;
702 }
703
704 /* Check the wide character input digits. There must be a multiple
42d7c593 705 of ten available. In each group it could be that one or the other
4b10dd6c
UD
706 character is missing. In this case the whole group must be
707 removed. */
708 cnt = 0;
709 while (cnt < ctype->wcdigits_act)
710 {
711 size_t inner;
712 for (inner = 0; inner < 10; ++inner)
713 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
714 break;
715
716 if (inner == 10)
717 cnt += 10;
718 else
719 {
720 /* Remove the group. */
721 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
722 ((ctype->wcdigits_act - cnt - 10)
723 * sizeof (ctype->wcdigits[0])));
724 ctype->wcdigits_act -= 10;
725 }
726 }
727
728 /* If no input digits are given use the default. */
729 if (ctype->wcdigits_act == 0)
730 {
731 if (ctype->wcdigits_max == 0)
732 {
733 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
734 10 * sizeof (uint32_t));
735 ctype->wcdigits_max = 10;
736 }
737
738 for (cnt = 0; cnt < 10; ++cnt)
739 ctype->wcdigits[cnt] = L'0' + cnt;
740
741 ctype->mbdigits_act = 10;
742 }
743
744 /* Check the outdigits. */
745 warned = 0;
746 for (cnt = 0; cnt < 10; ++cnt)
747 if (ctype->mboutdigits[cnt] == NULL)
748 {
749 static struct charseq replace[2];
750
751 if (!warned)
752 {
753 error (0, 0, _("\
754not all characters used in `outdigit' are available in the charmap"));
755 warned = 1;
756 }
757
758 replace[0].nbytes = 1;
759 replace[0].bytes[0] = '?';
760 replace[0].bytes[1] = '\0';
761 ctype->mboutdigits[cnt] = &replace[0];
762 }
763
764 warned = 0;
765 for (cnt = 0; cnt < 10; ++cnt)
766 if (ctype->wcoutdigits[cnt] == 0)
767 {
768 if (!warned)
769 {
770 error (0, 0, _("\
771not all characters used in `outdigit' are available in the repertoire"));
772 warned = 1;
773 }
774
775 ctype->wcoutdigits[cnt] = L'?';
75cd5204 776 }
19bc17a9
RM
777}
778
779
780void
4b10dd6c 781ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
75cd5204 782 const char *output_path)
19bc17a9
RM
783{
784 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
785 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
5491da0d 786 + (ctype->map_collection_nr - 2));
75cd5204
RM
787 struct iovec iov[2 + nelems + ctype->nr_charclass
788 + ctype->map_collection_nr];
19bc17a9 789 struct locale_file data;
4b10dd6c 790 uint32_t idx[nelems + 1];
75cd5204 791 size_t elem, cnt, offset, total;
4b10dd6c 792 char *cp;
19bc17a9
RM
793
794 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 795 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9
RM
796
797 data.magic = LIMAGIC (LC_CTYPE);
798 data.n = nelems;
799 iov[0].iov_base = (void *) &data;
800 iov[0].iov_len = sizeof (data);
801
802 iov[1].iov_base = (void *) idx;
803 iov[1].iov_len = sizeof (idx);
804
805 idx[0] = iov[0].iov_len + iov[1].iov_len;
806 offset = 0;
807
808 for (elem = 0; elem < nelems; ++elem)
809 {
810 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
811 switch (elem)
812 {
813#define CTYPE_DATA(name, base, len) \
814 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
815 iov[2 + elem + offset].iov_base = (base); \
816 iov[2 + elem + offset].iov_len = (len); \
75cd5204
RM
817 if (elem + 1 < nelems) \
818 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
819 break
820
821 CTYPE_DATA (_NL_CTYPE_CLASS,
822 ctype->ctype_b,
823 (256 + 128) * sizeof (char_class_t));
824
4a33c2f5
UD
825 CTYPE_DATA (_NL_CTYPE_TOUPPER,
826 ctype->map[0],
f1d8b804 827 (256 + 128) * sizeof (uint32_t));
4a33c2f5
UD
828 CTYPE_DATA (_NL_CTYPE_TOLOWER,
829 ctype->map[1],
f1d8b804 830 (256 + 128) * sizeof (uint32_t));
19bc17a9 831
49f2be5b
UD
832 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
833 ctype->map32[0],
f1d8b804 834 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
835 * sizeof (uint32_t));
836 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
837 ctype->map32[1],
f1d8b804 838 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
839 * sizeof (uint32_t));
840
19bc17a9
RM
841 CTYPE_DATA (_NL_CTYPE_CLASS32,
842 ctype->ctype32_b,
843 (ctype->plane_size * ctype->plane_cnt
844 * sizeof (char_class32_t)));
845
4a33c2f5
UD
846 CTYPE_DATA (_NL_CTYPE_NAMES,
847 ctype->names, (ctype->plane_size * ctype->plane_cnt
848 * sizeof (uint32_t)));
849
850 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
851 &ctype->translit_hash_size, sizeof (uint32_t));
852 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
853 &ctype->translit_hash_layers, sizeof (uint32_t));
854
855 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
856 ctype->translit_from_idx,
4b10dd6c
UD
857 ctype->translit_idx_size);
858
4a33c2f5
UD
859 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
860 ctype->translit_from_tbl,
4b10dd6c
UD
861 ctype->translit_from_tbl_size);
862
4a33c2f5
UD
863 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
864 ctype->translit_to_idx,
4b10dd6c
UD
865 ctype->translit_idx_size);
866
4a33c2f5
UD
867 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
868 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
4b10dd6c 869
4a33c2f5 870 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
4b10dd6c 871 &ctype->plane_size, sizeof (uint32_t));
4a33c2f5 872 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
4b10dd6c 873 &ctype->plane_cnt, sizeof (uint32_t));
19bc17a9 874
75cd5204
RM
875 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
876 /* The class name array. */
877 total = 0;
878 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
879 {
880 iov[2 + elem + offset].iov_base
881 = (void *) ctype->classnames[cnt];
882 iov[2 + elem + offset].iov_len
883 = strlen (ctype->classnames[cnt]) + 1;
884 total += iov[2 + elem + offset].iov_len;
885 }
ce7a5ef4
RM
886 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
887 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
888 total += 1 + (4 - ((total + 1) % 4));
75cd5204 889
4b10dd6c 890 idx[elem + 1] = idx[elem] + total;
75cd5204
RM
891 break;
892
893 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
894 /* The class name array. */
895 total = 0;
896 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
897 {
898 iov[2 + elem + offset].iov_base
899 = (void *) ctype->mapnames[cnt];
900 iov[2 + elem + offset].iov_len
901 = strlen (ctype->mapnames[cnt]) + 1;
902 total += iov[2 + elem + offset].iov_len;
903 }
ce7a5ef4
RM
904 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
905 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
906 total += 1 + (4 - ((total + 1) % 4));
75cd5204 907
4b10dd6c 908 idx[elem + 1] = idx[elem] + total;
75cd5204 909 break;
19bc17a9
RM
910
911 CTYPE_DATA (_NL_CTYPE_WIDTH,
5866b131
UD
912 ctype->width,
913 (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul);
19bc17a9 914
0200214b 915 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
4b10dd6c 916 &ctype->mb_cur_max, sizeof (uint32_t));
0200214b 917
ce7a5ef4
RM
918 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
919 total = strlen (ctype->codeset_name) + 1;
920 if (total % 4 == 0)
921 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
922 else
923 {
924 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
925 memset (mempcpy (iov[2 + elem + offset].iov_base,
926 ctype->codeset_name, total),
927 '\0', 4 - (total & 3));
ce7a5ef4
RM
928 total = (total + 3) & ~3;
929 }
930 iov[2 + elem + offset].iov_len = total;
4b10dd6c
UD
931 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
932 break;
933
4a33c2f5 934 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
4b10dd6c
UD
935 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
936 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
937 *(uint32_t *) iov[2 + elem + offset].iov_base =
938 ctype->mbdigits_act / 10;
a9c27b3e 939 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
940 break;
941
4a33c2f5 942 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
4b10dd6c
UD
943 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
944 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
945 *(uint32_t *) iov[2 + elem + offset].iov_base =
946 ctype->wcdigits_act / 10;
a9c27b3e 947 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
948 break;
949
950 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
951 /* Compute the length of all possible characters. For INDIGITS
952 there might be more than one. We simply concatenate all of
953 them with a NUL byte following. The NUL byte wouldn't be
954 necessary but it makes it easier for the user. */
955 total = 0;
956 for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
957 cnt < ctype->mbdigits_act; cnt += 10)
958 total += ctype->mbdigits[cnt]->nbytes + 1;
959 iov[2 + elem + offset].iov_base = (char *) alloca (total);
960 iov[2 + elem + offset].iov_len = total;
961
962 cp = iov[2 + elem + offset].iov_base;
963 for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
964 cnt < ctype->mbdigits_act; cnt += 10)
965 {
966 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
967 ctype->mbdigits[cnt]->nbytes);
968 *cp++ = '\0';
969 }
a9c27b3e 970 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
971 break;
972
973 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
974 /* Compute the length of all possible characters. For INDIGITS
975 there might be more than one. We simply concatenate all of
976 them with a NUL byte following. The NUL byte wouldn't be
977 necessary but it makes it easier for the user. */
978 cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
979 total = ctype->mboutdigits[cnt]->nbytes + 1;
980 iov[2 + elem + offset].iov_base = (char *) alloca (total);
981 iov[2 + elem + offset].iov_len = total;
982
983 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
984 ctype->mbdigits[cnt]->bytes,
985 ctype->mbdigits[cnt]->nbytes) = '\0';
a9c27b3e 986 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
987 break;
988
4a33c2f5 989 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
4b10dd6c
UD
990 total = ctype->wcdigits_act / 10;
991
992 iov[2 + elem + offset].iov_base =
993 (uint32_t *) alloca (total * sizeof (uint32_t));
994 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
995
4a33c2f5 996 for (cnt = elem - _NL_CTYPE_INDIGITS0_WC;
4b10dd6c
UD
997 cnt < ctype->wcdigits_act; cnt += 10)
998 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
4a33c2f5 999 = ctype->wcdigits[cnt];
a9c27b3e 1000 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1001 break;
1002
4a33c2f5
UD
1003 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1004 cnt = elem - _NL_CTYPE_OUTDIGIT0_WC;
4b10dd6c
UD
1005 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1006 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a9c27b3e 1007 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1008 break;
1009
19bc17a9
RM
1010 default:
1011 assert (! "unknown CTYPE element");
1012 }
1013 else
1014 {
1015 /* Handle extra maps. */
5491da0d 1016 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
19bc17a9 1017
49f2be5b 1018 iov[2 + elem + offset].iov_base = ctype->map32[nr];
75cd5204 1019 iov[2 + elem + offset].iov_len = ((ctype->plane_size
f1d8b804 1020 * ctype->plane_cnt)
4b10dd6c 1021 * sizeof (uint32_t));
19bc17a9 1022
4b10dd6c 1023 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
75cd5204 1024 }
19bc17a9 1025 }
19bc17a9 1026
75cd5204
RM
1027 assert (2 + elem + offset == (nelems + ctype->nr_charclass
1028 + ctype->map_collection_nr + 2));
19bc17a9 1029
75cd5204 1030 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
1031}
1032
1033
4b10dd6c
UD
1034/* Local functions. */
1035static void
1036ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1037 const char *name)
19bc17a9 1038{
4b10dd6c 1039 size_t cnt;
19bc17a9 1040
4b10dd6c
UD
1041 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1042 if (strcmp (ctype->classnames[cnt], name) == 0)
1043 break;
19bc17a9 1044
4b10dd6c
UD
1045 if (cnt < ctype->nr_charclass)
1046 {
1047 lr_error (lr, _("character class `%s' already defined"), name);
1048 return;
1049 }
19bc17a9 1050
4b10dd6c
UD
1051 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1052 /* Exit code 2 is prescribed in P1003.2b. */
1053 error (2, 0, _("\
5d431a3e 1054implementation limit: no more than %Zd character classes allowed"),
4b10dd6c 1055 MAX_NR_CHARCLASS);
19bc17a9 1056
4b10dd6c 1057 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1058}
1059
1060
4b10dd6c
UD
1061static void
1062ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1063 const char *name, struct charmap_t *charmap)
19bc17a9 1064{
4b10dd6c 1065 size_t max_chars = 0;
ba1ffaa1 1066 size_t cnt;
19bc17a9 1067
4b10dd6c 1068 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1069 {
4b10dd6c
UD
1070 if (strcmp (ctype->mapnames[cnt], name) == 0)
1071 break;
1072
1073 if (max_chars < ctype->map_collection_max[cnt])
1074 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1075 }
1076
4b10dd6c
UD
1077 if (cnt < ctype->map_collection_nr)
1078 {
1079 lr_error (lr, _("character map `%s' already defined"), name);
1080 return;
1081 }
19bc17a9 1082
4b10dd6c
UD
1083 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1084 /* Exit code 2 is prescribed in P1003.2b. */
1085 error (2, 0, _("\
1086implementation limit: no more than %d character maps allowed"),
1087 MAX_NR_CHARMAP);
19bc17a9 1088
4b10dd6c
UD
1089 ctype->mapnames[cnt] = name;
1090
1091 if (max_chars == 0)
1092 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1093 else
1094 ctype->map_collection_max[cnt] = max_chars;
1095
1096 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1097 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1098 ctype->map_collection_act[cnt] = 256;
19bc17a9 1099
4b10dd6c 1100 ++ctype->map_collection_nr;
19bc17a9
RM
1101}
1102
1103
4b10dd6c 1104/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1105 is possible if we only want to extend the name array. */
4b10dd6c
UD
1106static uint32_t *
1107find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1108 size_t *act, uint32_t idx)
19bc17a9 1109{
4b10dd6c 1110 size_t cnt;
19bc17a9 1111
4b10dd6c
UD
1112 if (idx < 256)
1113 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1114
4b10dd6c
UD
1115 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1116 if (ctype->charnames[cnt] == idx)
1117 break;
19bc17a9 1118
4b10dd6c
UD
1119 /* We have to distinguish two cases: the name is found or not. */
1120 if (cnt == ctype->charnames_act)
1121 {
1122 /* Extend the name array. */
1123 if (ctype->charnames_act == ctype->charnames_max)
1124 {
1125 ctype->charnames_max *= 2;
5866b131 1126 ctype->charnames = (uint32_t *)
4b10dd6c 1127 xrealloc (ctype->charnames,
5866b131 1128 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1129 }
1130 ctype->charnames[ctype->charnames_act++] = idx;
1131 }
19bc17a9 1132
4b10dd6c
UD
1133 if (table == NULL)
1134 /* We have done everything we are asked to do. */
1135 return NULL;
19bc17a9 1136
4b10dd6c
UD
1137 if (cnt >= *act)
1138 {
1139 if (cnt >= *max)
1140 {
1141 size_t old_max = *max;
1142 do
1143 *max *= 2;
1144 while (*max <= cnt);
19bc17a9 1145
4b10dd6c 1146 *table =
5866b131 1147 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1148 memset (&(*table)[old_max], '\0',
1149 (*max - old_max) * sizeof (uint32_t));
1150 }
19bc17a9 1151
76e680a8 1152 *act = cnt + 1;
4b10dd6c 1153 }
19bc17a9 1154
4b10dd6c 1155 return &(*table)[cnt];
19bc17a9
RM
1156}
1157
1158
4b10dd6c
UD
1159static int
1160get_character (struct token *now, struct charmap_t *charmap,
1161 struct repertoire_t *repertoire,
1162 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1163{
4b10dd6c
UD
1164 if (now->tok == tok_bsymbol)
1165 {
1166 /* This will hopefully be the normal case. */
1167 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1168 now->val.str.lenmb);
1169 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1170 now->val.str.lenmb);
1171 }
1172 else if (now->tok == tok_ucs4)
1173 {
f0a4b6b1
UD
1174 char utmp[10];
1175
1176 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1177 *seqp = charmap_find_value (charmap, utmp, 9);
1178
1179 if (*seqp == NULL)
1180 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1181
4b10dd6c
UD
1182 if (*seqp == NULL)
1183 {
1184 /* Compute the value in the charmap from the UCS value. */
1185 const char *symbol = repertoire_find_symbol (repertoire,
1186 now->val.ucs4);
19bc17a9 1187
4b10dd6c
UD
1188 if (symbol == NULL)
1189 *seqp = NULL;
1190 else
1191 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1192
4b10dd6c
UD
1193 if (*seqp == NULL)
1194 {
1195 /* Insert a negative entry. */
1196 static const struct charseq negative
1197 = { .ucs4 = ILLEGAL_CHAR_VALUE };
5866b131
UD
1198 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1199 sizeof (uint32_t));
4b10dd6c
UD
1200 *newp = now->val.ucs4;
1201
5866b131 1202 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
4b10dd6c
UD
1203 (void *) &negative);
1204 }
1205 else
1206 (*seqp)->ucs4 = now->val.ucs4;
1207 }
1208 else if ((*seqp)->ucs4 != now->val.ucs4)
1209 *seqp = NULL;
19bc17a9 1210
4b10dd6c
UD
1211 *wchp = now->val.ucs4;
1212 }
1213 else if (now->tok == tok_charcode)
1214 {
1215 /* We must map from the byte code to UCS4. */
1216 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1217 now->val.str.lenmb);
19bc17a9 1218
4b10dd6c
UD
1219 if (*seqp == NULL)
1220 *wchp = ILLEGAL_CHAR_VALUE;
1221 else
1222 {
1223 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1224 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1225 strlen ((*seqp)->name));
1226 *wchp = (*seqp)->ucs4;
1227 }
1228 }
1229 else
1230 return 1;
19bc17a9
RM
1231
1232 return 0;
1233}
1234
1235
4b10dd6c
UD
1236/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */
1237static void
1238charclass_symbolic_ellipsis (struct linereader *ldfile,
1239 struct locale_ctype_t *ctype,
1240 struct charmap_t *charmap,
1241 struct repertoire_t *repertoire,
1242 struct token *now,
1243 const char *last_str,
1244 unsigned long int class256_bit,
1245 unsigned long int class_bit, int base,
1246 int ignore_content, int handle_digits)
19bc17a9 1247{
4b10dd6c
UD
1248 const char *nowstr = now->val.str.startmb;
1249 char tmp[now->val.str.lenmb + 1];
1250 const char *cp;
1251 char *endp;
1252 unsigned long int from;
1253 unsigned long int to;
19bc17a9 1254
4b10dd6c
UD
1255 /* We have to compute the ellipsis values using the symbolic names. */
1256 assert (last_str != NULL);
1257
1258 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1259 {
4b10dd6c
UD
1260 invalid_range:
1261 lr_error (ldfile,
549b3c3a 1262 _("`%s' and `%.*s' are no valid names for symbolic range"),
f6ada7ad 1263 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1264 return;
19bc17a9
RM
1265 }
1266
4b10dd6c
UD
1267 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1268 /* Nothing to do, the names are the same. */
1269 return;
19bc17a9 1270
4b10dd6c
UD
1271 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1272 ;
19bc17a9 1273
4b10dd6c
UD
1274 errno = 0;
1275 from = strtoul (cp, &endp, base);
1276 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1277 goto invalid_range;
19bc17a9 1278
4b10dd6c 1279 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1280 if ((to == UINT_MAX && errno == ERANGE)
1281 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1282 goto invalid_range;
19bc17a9 1283
4b10dd6c
UD
1284 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1285 if (!ignore_content)
1286 {
1287 now->val.str.startmb = tmp;
1288 while (++from <= to)
1289 {
1290 struct charseq *seq;
1291 uint32_t wch;
19bc17a9 1292
4b10dd6c
UD
1293 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1294 last_str, now->val.str.lenmb - (cp - last_str), from);
19bc17a9 1295
4b10dd6c
UD
1296 get_character (now, charmap, repertoire, &seq, &wch);
1297
1298 if (seq != NULL && seq->nbytes == 1)
1299 /* Yep, we can store information about this byte sequence. */
1300 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1301
4b10dd6c
UD
1302 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1303 /* We have the UCS4 position. */
1304 *find_idx (ctype, &ctype->class_collection,
1305 &ctype->class_collection_max,
1306 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1307
4b10dd6c
UD
1308 if (handle_digits == 1)
1309 {
1310 /* We must store the digit values. */
1311 if (ctype->mbdigits_act == ctype->mbdigits_max)
1312 {
1313 ctype->mbdigits_max *= 2;
1314 ctype->mbdigits = xrealloc (ctype->mbdigits,
1315 (ctype->mbdigits_max
1316 * sizeof (char *)));
1317 ctype->wcdigits_max *= 2;
1318 ctype->wcdigits = xrealloc (ctype->wcdigits,
1319 (ctype->wcdigits_max
1320 * sizeof (uint32_t)));
1321 }
1322
1323 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1324 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1325 }
1326 else if (handle_digits == 2)
1327 {
1328 /* We must store the digit values. */
1329 if (ctype->outdigits_act >= 10)
1330 {
1331 lr_error (ldfile, _("\
1332%s: field `%s' does not contain exactly ten entries"),
1333 "LC_CTYPE", "outdigit");
1334 return;
1335 }
1336
1337 ctype->mboutdigits[ctype->outdigits_act] = seq;
1338 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1339 ++ctype->outdigits_act;
1340 }
1341 }
1342 }
19bc17a9
RM
1343}
1344
1345
4b10dd6c
UD
1346/* Ellipsis like in `<U1234>..<U2345>'. */
1347static void
1348charclass_ucs4_ellipsis (struct linereader *ldfile,
1349 struct locale_ctype_t *ctype,
1350 struct charmap_t *charmap,
1351 struct repertoire_t *repertoire,
1352 struct token *now, uint32_t last_wch,
1353 unsigned long int class256_bit,
1354 unsigned long int class_bit, int ignore_content,
1355 int handle_digits)
19bc17a9 1356{
4b10dd6c 1357 if (last_wch > now->val.ucs4)
19bc17a9 1358 {
4b10dd6c
UD
1359 lr_error (ldfile, _("\
1360to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1361 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1362 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1363 return;
1364 }
1365
4b10dd6c
UD
1366 if (!ignore_content)
1367 while (++last_wch <= now->val.ucs4)
1368 {
1369 /* We have to find out whether there is a byte sequence corresponding
1370 to this UCS4 value. */
f0a4b6b1
UD
1371 struct charseq *seq;
1372 char utmp[10];
1373
1374 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1375 seq = charmap_find_value (charmap, utmp, 9);
1376
1377 if (seq == NULL)
1378 /* Try looking in the repertoire map. */
1379 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1380
4b10dd6c
UD
1381 /* If this is the first time we look for this sequence create a new
1382 entry. */
1383 if (seq == NULL)
1384 {
f0a4b6b1
UD
1385 static const struct charseq negative
1386 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1387
f0a4b6b1
UD
1388 /* Find the symbolic name for this UCS4 value. */
1389 if (repertoire != NULL)
4b10dd6c 1390 {
f0a4b6b1
UD
1391 const char *symbol = repertoire_find_symbol (repertoire,
1392 last_wch);
5866b131
UD
1393 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1394 sizeof (uint32_t));
f0a4b6b1
UD
1395 *newp = last_wch;
1396
1397 if (symbol != NULL)
1398 /* We have a name, now search the multibyte value. */
1399 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1400
1401 if (seq == NULL)
1402 /* We have to create a fake entry. */
1403 seq = (struct charseq *) &negative;
1404 else
1405 seq->ucs4 = last_wch;
1406
5866b131
UD
1407 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1408 seq);
4b10dd6c
UD
1409 }
1410 else
f0a4b6b1
UD
1411 /* We have to create a fake entry. */
1412 seq = (struct charseq *) &negative;
4b10dd6c
UD
1413 }
1414
1415 /* We have a name, now search the multibyte value. */
1416 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1417 /* Yep, we can store information about this byte sequence. */
1418 ctype->class256_collection[(size_t) seq->bytes[0]]
1419 |= class256_bit;
1420
1421 /* And of course we have the UCS4 position. */
5866b131 1422 if (class_bit != 0)
4b10dd6c
UD
1423 *find_idx (ctype, &ctype->class_collection,
1424 &ctype->class_collection_max,
1425 &ctype->class_collection_act, last_wch) |= class_bit;
1426
1427 if (handle_digits == 1)
1428 {
1429 /* We must store the digit values. */
1430 if (ctype->mbdigits_act == ctype->mbdigits_max)
1431 {
1432 ctype->mbdigits_max *= 2;
1433 ctype->mbdigits = xrealloc (ctype->mbdigits,
1434 (ctype->mbdigits_max
1435 * sizeof (char *)));
1436 ctype->wcdigits_max *= 2;
1437 ctype->wcdigits = xrealloc (ctype->wcdigits,
1438 (ctype->wcdigits_max
1439 * sizeof (uint32_t)));
1440 }
1441
1442 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1443 ? seq : NULL);
1444 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1445 }
1446 else if (handle_digits == 2)
1447 {
1448 /* We must store the digit values. */
1449 if (ctype->outdigits_act >= 10)
1450 {
1451 lr_error (ldfile, _("\
1452%s: field `%s' does not contain exactly ten entries"),
1453 "LC_CTYPE", "outdigit");
1454 return;
1455 }
19bc17a9 1456
4b10dd6c
UD
1457 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1458 ? seq : NULL);
1459 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1460 ++ctype->outdigits_act;
1461 }
1462 }
19bc17a9
RM
1463}
1464
1465
4b10dd6c 1466/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1467static void
4b10dd6c
UD
1468charclass_charcode_ellipsis (struct linereader *ldfile,
1469 struct locale_ctype_t *ctype,
1470 struct charmap_t *charmap,
1471 struct repertoire_t *repertoire,
1472 struct token *now, char *last_charcode,
1473 uint32_t last_charcode_len,
1474 unsigned long int class256_bit,
1475 unsigned long int class_bit, int ignore_content,
1476 int handle_digits)
19bc17a9 1477{
4b10dd6c
UD
1478 /* First check whether the to-value is larger. */
1479 if (now->val.charcode.nbytes != last_charcode_len)
1480 {
1481 lr_error (ldfile, _("\
1482start end end character sequence of range must have the same length"));
1483 return;
1484 }
19bc17a9 1485
4b10dd6c 1486 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1487 {
4b10dd6c
UD
1488 lr_error (ldfile, _("\
1489to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1490 return;
1491 }
1492
4b10dd6c
UD
1493 if (!ignore_content)
1494 {
1495 do
1496 {
1497 /* Increment the byte sequence value. */
1498 struct charseq *seq;
1499 uint32_t wch;
1500 int i;
1501
1502 for (i = last_charcode_len - 1; i >= 0; --i)
1503 if (++last_charcode[i] != 0)
1504 break;
1505
1506 if (last_charcode_len == 1)
1507 /* Of course we have the charcode value. */
1508 ctype->class256_collection[(size_t) last_charcode[0]]
1509 |= class256_bit;
1510
1511 /* Find the symbolic name. */
1512 seq = charmap_find_symbol (charmap, last_charcode,
1513 last_charcode_len);
1514 if (seq != NULL)
1515 {
1516 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1517 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1518 strlen (seq->name));
f0a4b6b1 1519 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1520
1521 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1522 *find_idx (ctype, &ctype->class_collection,
1523 &ctype->class_collection_max,
1524 &ctype->class_collection_act, wch) |= class_bit;
1525 }
1526 else
1527 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1528
4b10dd6c
UD
1529 if (handle_digits == 1)
1530 {
1531 /* We must store the digit values. */
1532 if (ctype->mbdigits_act == ctype->mbdigits_max)
1533 {
1534 ctype->mbdigits_max *= 2;
1535 ctype->mbdigits = xrealloc (ctype->mbdigits,
1536 (ctype->mbdigits_max
1537 * sizeof (char *)));
1538 ctype->wcdigits_max *= 2;
1539 ctype->wcdigits = xrealloc (ctype->wcdigits,
1540 (ctype->wcdigits_max
1541 * sizeof (uint32_t)));
1542 }
1543
1544 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1545 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1546 seq->nbytes = last_charcode_len;
1547
1548 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1549 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1550 }
1551 else if (handle_digits == 2)
1552 {
1553 struct charseq *seq;
1554 /* We must store the digit values. */
1555 if (ctype->outdigits_act >= 10)
1556 {
1557 lr_error (ldfile, _("\
1558%s: field `%s' does not contain exactly ten entries"),
1559 "LC_CTYPE", "outdigit");
1560 return;
1561 }
1562
1563 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1564 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1565 seq->nbytes = last_charcode_len;
1566
1567 ctype->mboutdigits[ctype->outdigits_act] = seq;
1568 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1569 ++ctype->outdigits_act;
1570 }
1571 }
1572 while (memcmp (last_charcode, now->val.charcode.bytes,
1573 last_charcode_len) != 0);
1574 }
19bc17a9
RM
1575}
1576
1577
4b10dd6c
UD
1578/* Read one transliteration entry. */
1579static uint32_t *
1580read_widestring (struct linereader *ldfile, struct token *now,
1581 struct charmap_t *charmap, struct repertoire_t *repertoire)
19bc17a9 1582{
4b10dd6c 1583 uint32_t *wstr;
19bc17a9 1584
4b10dd6c
UD
1585 if (now->tok == tok_default_missing)
1586 /* The special name "" will denote this case. */
5866b131 1587 wstr = ((uint32_t *) { 0 });
4b10dd6c 1588 else if (now->tok == tok_bsymbol)
19bc17a9 1589 {
4b10dd6c 1590 /* Get the value from the repertoire. */
a673fbcb 1591 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1592 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1593 now->val.str.lenmb);
1594 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1595 {
1596 /* We cannot proceed, we don't know the UCS4 value. */
1597 free (wstr);
1598 return NULL;
1599 }
4b10dd6c
UD
1600
1601 wstr[1] = 0;
19bc17a9 1602 }
4b10dd6c 1603 else if (now->tok == tok_ucs4)
19bc17a9 1604 {
a673fbcb 1605 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1606 wstr[0] = now->val.ucs4;
1607 wstr[1] = 0;
1608 }
1609 else if (now->tok == tok_charcode)
1610 {
1611 /* Argh, we have to convert to the symbol name first and then to the
1612 UCS4 value. */
1613 struct charseq *seq = charmap_find_symbol (charmap,
1614 now->val.str.startmb,
1615 now->val.str.lenmb);
1616 if (seq == NULL)
1617 /* Cannot find the UCS4 value. */
1618 return NULL;
1619
1620 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1621 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1622 strlen (seq->name));
1623 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1624 /* We cannot proceed, we don't know the UCS4 value. */
1625 return NULL;
1626
a673fbcb 1627 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1628 wstr[0] = seq->ucs4;
1629 wstr[1] = 0;
1630 }
1631 else if (now->tok == tok_string)
1632 {
1633 wstr = now->val.str.startwc;
a673fbcb 1634 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1635 return NULL;
1636 }
1637 else
1638 {
1639 if (now->tok != tok_eol && now->tok != tok_eof)
1640 lr_ignore_rest (ldfile, 0);
1641 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1642 return (uint32_t *) -1l;
19bc17a9
RM
1643 }
1644
4b10dd6c
UD
1645 return wstr;
1646}
19bc17a9 1647
19bc17a9 1648
4b10dd6c
UD
1649static void
1650read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1651 struct token *now, struct charmap_t *charmap,
1652 struct repertoire_t *repertoire)
1653{
1654 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1655 struct translit_t *result;
1656 struct translit_to_t **top;
a673fbcb 1657 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1658 int first;
1659 int ignore;
1660
1661 if (from_wstr == NULL)
1662 /* There is no valid from string. */
1663 return;
19bc17a9 1664
4b10dd6c
UD
1665 result = (struct translit_t *) obstack_alloc (ob,
1666 sizeof (struct translit_t));
1667 result->from = from_wstr;
a673fbcb
UD
1668 result->fname = ldfile->fname;
1669 result->lineno = ldfile->lineno;
4b10dd6c
UD
1670 result->next = NULL;
1671 result->to = NULL;
1672 top = &result->to;
1673 first = 1;
1674 ignore = 0;
1675
1676 while (1)
1677 {
1678 uint32_t *to_wstr;
1679
1680 /* Next we have one or more transliterations. They are
1681 separated by semicolons. */
1682 now = lr_token (ldfile, charmap, repertoire);
1683
1684 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1685 {
1686 /* One string read. */
1687 const uint32_t zero = 0;
1688
1689 if (!ignore)
1690 {
1691 obstack_grow (ob, &zero, 4);
1692 to_wstr = obstack_finish (ob);
1693
1694 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1695 (*top)->str = to_wstr;
1696 (*top)->next = NULL;
1697 }
1698
1699 if (now->tok == tok_eol)
1700 {
1701 result->next = ctype->translit;
1702 ctype->translit = result;
1703 return;
1704 }
1705
1706 if (!ignore)
1707 top = &(*top)->next;
1708 ignore = 0;
1709 }
1710 else
1711 {
1712 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1713 if (to_wstr == (uint32_t *) -1l)
1714 {
1715 /* An error occurred. */
1716 obstack_free (ob, result);
1717 return;
1718 }
1719
1720 if (to_wstr == NULL)
1721 ignore = 1;
1722 else
1723 /* This value is usable. */
1724 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1725
4b10dd6c
UD
1726 first = 0;
1727 }
1728 }
19bc17a9
RM
1729}
1730
1731
a673fbcb
UD
1732static void
1733read_translit_ignore_entry (struct linereader *ldfile,
1734 struct locale_ctype_t *ctype,
1735 struct charmap_t *charmap,
1736 struct repertoire_t *repertoire)
1737{
1738 /* We expect a semicolon-separated list of characters we ignore. We are
1739 only interested in the wide character definitions. These must be
1740 single characters, possibly defining a range when an ellipsis is used. */
1741 while (1)
1742 {
1743 struct token *now = lr_token (ldfile, charmap, repertoire);
1744 struct translit_ignore_t *newp;
1745 uint32_t from;
1746
1747 if (now->tok == tok_eol || now->tok == tok_eof)
1748 {
1749 lr_error (ldfile,
1750 _("premature end of `translit_ignore' definition"));
1751 return;
1752 }
1753
1754 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1755 {
1756 lr_error (ldfile, _("syntax error"));
1757 lr_ignore_rest (ldfile, 0);
1758 return;
1759 }
1760
1761 if (now->tok == tok_ucs4)
1762 from = now->val.ucs4;
1763 else
f0a4b6b1
UD
1764 /* Try to get the value. */
1765 from = repertoire_find_value (repertoire, now->val.str.startmb,
1766 now->val.str.lenmb);
a673fbcb
UD
1767
1768 if (from == ILLEGAL_CHAR_VALUE)
1769 {
1770 lr_error (ldfile, "invalid character name");
1771 newp = NULL;
1772 }
1773 else
1774 {
1775 newp = (struct translit_ignore_t *)
1776 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1777 newp->from = from;
1778 newp->to = from;
1779
1780 newp->next = ctype->translit_ignore;
1781 ctype->translit_ignore = newp;
1782 }
1783
1784 /* Now we expect either a semicolon, an ellipsis, or the end of the
1785 line. */
1786 now = lr_token (ldfile, charmap, repertoire);
1787
1788 if (now->tok == tok_ellipsis2)
1789 {
1790 /* XXX Should we bother implementing `....'? `...' certainly
1791 will not be implemented. */
1792 uint32_t to;
1793
1794 now = lr_token (ldfile, charmap, repertoire);
1795
1796 if (now->tok == tok_eol || now->tok == tok_eof)
1797 {
1798 lr_error (ldfile,
1799 _("premature end of `translit_ignore' definition"));
1800 return;
1801 }
1802
1803 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1804 {
1805 lr_error (ldfile, _("syntax error"));
1806 lr_ignore_rest (ldfile, 0);
1807 return;
1808 }
1809
1810 if (now->tok == tok_ucs4)
1811 to = now->val.ucs4;
1812 else
f0a4b6b1
UD
1813 /* Try to get the value. */
1814 to = repertoire_find_value (repertoire, now->val.str.startmb,
1815 now->val.str.lenmb);
a673fbcb
UD
1816
1817 if (to == ILLEGAL_CHAR_VALUE)
1818 lr_error (ldfile, "invalid character name");
1819 else
1820 {
1821 /* Make sure the `to'-value is larger. */
1822 if (to >= from)
1823 newp->to = to;
1824 else
1825 lr_error (ldfile, _("\
1826to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1827 (to | from) < 65536 ? 4 : 8, to,
1828 (to | from) < 65536 ? 4 : 8, from);
1829 }
1830
1831 /* And the next token. */
1832 now = lr_token (ldfile, charmap, repertoire);
1833 }
1834
1835 if (now->tok == tok_eol || now->tok == tok_eof)
1836 /* We are done. */
1837 return;
1838
1839 if (now->tok == tok_semicolon)
1840 /* Next round. */
1841 continue;
1842
1843 /* If we come here something is wrong. */
1844 lr_error (ldfile, _("syntax error"));
1845 lr_ignore_rest (ldfile, 0);
1846 return;
1847 }
1848}
1849
1850
4b10dd6c
UD
1851/* The parser for the LC_CTYPE section of the locale definition. */
1852void
1853ctype_read (struct linereader *ldfile, struct localedef_t *result,
1854 struct charmap_t *charmap, const char *repertoire_name,
1855 int ignore_content)
19bc17a9 1856{
4b10dd6c
UD
1857 struct repertoire_t *repertoire = NULL;
1858 struct locale_ctype_t *ctype;
1859 struct token *now;
1860 enum token_t nowtok;
19bc17a9 1861 size_t cnt;
4b10dd6c
UD
1862 struct charseq *last_seq;
1863 uint32_t last_wch = 0;
1864 enum token_t last_token;
1865 enum token_t ellipsis_token;
1866 char last_charcode[16];
1867 size_t last_charcode_len = 0;
1868 const char *last_str = NULL;
1869 int mapidx;
19bc17a9 1870
4b10dd6c
UD
1871 /* Get the repertoire we have to use. */
1872 if (repertoire_name != NULL)
1873 repertoire = repertoire_read (repertoire_name);
19bc17a9 1874
4b10dd6c
UD
1875 /* The rest of the line containing `LC_CTYPE' must be free. */
1876 lr_ignore_rest (ldfile, 1);
19bc17a9 1877
4b10dd6c
UD
1878
1879 do
19bc17a9 1880 {
4b10dd6c
UD
1881 now = lr_token (ldfile, charmap, NULL);
1882 nowtok = now->tok;
19bc17a9 1883 }
4b10dd6c 1884 while (nowtok == tok_eol);
19bc17a9 1885
4b10dd6c
UD
1886 /* If we see `copy' now we are almost done. */
1887 if (nowtok == tok_copy)
1888 {
01ff9d0b
UD
1889 handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
1890 LC_CTYPE, "LC_CTYPE", ignore_content);
4b10dd6c
UD
1891 return;
1892 }
75cd5204 1893
4b10dd6c
UD
1894 /* Prepare the data structures. */
1895 ctype_startup (ldfile, result, charmap, ignore_content);
1896 ctype = result->categories[LC_CTYPE].ctype;
1897
1898 /* Remember the repertoire we use. */
1899 if (!ignore_content)
1900 ctype->repertoire = repertoire;
1901
1902 while (1)
19bc17a9 1903 {
4b10dd6c
UD
1904 unsigned long int class_bit = 0;
1905 unsigned long int class256_bit = 0;
1906 int handle_digits = 0;
1907
1908 /* Of course we don't proceed beyond the end of file. */
1909 if (nowtok == tok_eof)
1910 break;
1911
1912 /* Ingore empty lines. */
1913 if (nowtok == tok_eol)
19bc17a9 1914 {
4b10dd6c
UD
1915 now = lr_token (ldfile, charmap, NULL);
1916 nowtok = now->tok;
1917 continue;
1918 }
19bc17a9 1919
4b10dd6c
UD
1920 switch (nowtok)
1921 {
5491da0d
UD
1922 case tok_charclass:
1923 now = lr_token (ldfile, charmap, NULL);
1924 while (now->tok == tok_ident || now->tok == tok_string)
1925 {
1926 ctype_class_new (ldfile, ctype, now->val.str.startmb);
1927 now = lr_token (ldfile, charmap, NULL);
1928 if (now->tok != tok_semicolon)
1929 break;
1930 now = lr_token (ldfile, charmap, NULL);
1931 }
1932 if (now->tok != tok_eol)
1933 SYNTAX_ERROR (_("\
1934%s: syntax error in definition of new character class"), "LC_CTYPE");
1935 break;
1936
1937 case tok_charconv:
1938 now = lr_token (ldfile, charmap, NULL);
1939 while (now->tok == tok_ident || now->tok == tok_string)
1940 {
1941 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
1942 now = lr_token (ldfile, charmap, NULL);
1943 if (now->tok != tok_semicolon)
1944 break;
1945 now = lr_token (ldfile, charmap, NULL);
1946 }
1947 if (now->tok != tok_eol)
1948 SYNTAX_ERROR (_("\
1949%s: syntax error in definition of new character map"), "LC_CTYPE");
1950 break;
1951
4b10dd6c 1952 case tok_class:
b9eb05d6
UD
1953 /* Ignore the rest of the line if we don't need the input of
1954 this line. */
1955 if (ignore_content)
1956 {
1957 lr_ignore_rest (ldfile, 0);
1958 break;
1959 }
1960
4b10dd6c
UD
1961 /* We simply forget the `class' keyword and use the following
1962 operand to determine the bit. */
1963 now = lr_token (ldfile, charmap, NULL);
1964 if (now->tok == tok_ident || now->tok == tok_string)
1965 {
87372aa9 1966 /* Must can be one of the predefined class names. */
4b10dd6c
UD
1967 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1968 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1969 break;
1970 if (cnt >= ctype->nr_charclass)
1971 {
011ebfab 1972#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
1973 if (now->val.str.lenmb == 8
1974 && memcmp ("special1", now->val.str.startmb, 8) == 0)
1975 class_bit = _ISwspecial1;
1976 else if (now->val.str.lenmb == 8
1977 && memcmp ("special2", now->val.str.startmb, 8) == 0)
1978 class_bit = _ISwspecial2;
1979 else if (now->val.str.lenmb == 8
1980 && memcmp ("special3", now->val.str.startmb, 8) == 0)
1981 class_bit = _ISwspecial3;
1982 else
011ebfab 1983#endif
4b10dd6c 1984 {
87372aa9
UD
1985 /* OK, it's a new class. */
1986 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 1987
87372aa9 1988 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
1989 }
1990 }
1991 else
7f653277
UD
1992 {
1993 class_bit = _ISwbit (cnt);
4b10dd6c 1994
7f653277
UD
1995 free (now->val.str.startmb);
1996 }
4b10dd6c
UD
1997 }
1998 else if (now->tok == tok_digit)
1999 goto handle_tok_digit;
2000 else if (now->tok < tok_upper || now->tok > tok_blank)
2001 goto err_label;
2002 else
2003 {
2004 class_bit = BITw (now->tok);
2005 class256_bit = BIT (now->tok);
2006 }
2007
2008 /* The next character must be a semicolon. */
2009 now = lr_token (ldfile, charmap, NULL);
2010 if (now->tok != tok_semicolon)
2011 goto err_label;
2012 goto read_charclass;
2013
2014 case tok_upper:
2015 case tok_lower:
2016 case tok_alpha:
2017 case tok_alnum:
2018 case tok_space:
2019 case tok_cntrl:
2020 case tok_punct:
2021 case tok_graph:
2022 case tok_print:
2023 case tok_xdigit:
2024 case tok_blank:
b9eb05d6
UD
2025 /* Ignore the rest of the line if we don't need the input of
2026 this line. */
2027 if (ignore_content)
2028 {
2029 lr_ignore_rest (ldfile, 0);
2030 break;
2031 }
2032
4b10dd6c
UD
2033 class_bit = BITw (now->tok);
2034 class256_bit = BIT (now->tok);
2035 handle_digits = 0;
2036 read_charclass:
2037 ctype->class_done |= class_bit;
2038 last_token = tok_none;
2039 ellipsis_token = tok_none;
2040 now = lr_token (ldfile, charmap, NULL);
2041 while (now->tok != tok_eol && now->tok != tok_eof)
2042 {
2043 uint32_t wch;
2044 struct charseq *seq;
2045
2046 if (ellipsis_token == tok_none)
2047 {
2048 if (get_character (now, charmap, repertoire, &seq, &wch))
2049 goto err_label;
2050
2051 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2052 /* Yep, we can store information about this byte
2053 sequence. */
2054 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2055
2056 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2057 && class_bit != 0)
2058 /* We have the UCS4 position. */
2059 *find_idx (ctype, &ctype->class_collection,
2060 &ctype->class_collection_max,
2061 &ctype->class_collection_act, wch) |= class_bit;
2062
2063 last_token = now->tok;
549b3c3a 2064 /* Terminate the string. */
9e2b7438
UD
2065 if (last_token == tok_bsymbol)
2066 {
2067 now->val.str.startmb[now->val.str.lenmb] = '\0';
2068 last_str = now->val.str.startmb;
2069 }
2070 else
2071 last_str = NULL;
4b10dd6c
UD
2072 last_seq = seq;
2073 last_wch = wch;
2074 memcpy (last_charcode, now->val.charcode.bytes, 16);
2075 last_charcode_len = now->val.charcode.nbytes;
2076
2077 if (!ignore_content && handle_digits == 1)
2078 {
2079 /* We must store the digit values. */
2080 if (ctype->mbdigits_act == ctype->mbdigits_max)
2081 {
b9eb05d6 2082 ctype->mbdigits_max += 10;
4b10dd6c
UD
2083 ctype->mbdigits = xrealloc (ctype->mbdigits,
2084 (ctype->mbdigits_max
2085 * sizeof (char *)));
b9eb05d6 2086 ctype->wcdigits_max += 10;
4b10dd6c
UD
2087 ctype->wcdigits = xrealloc (ctype->wcdigits,
2088 (ctype->wcdigits_max
2089 * sizeof (uint32_t)));
2090 }
2091
2092 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2093 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2094 }
2095 else if (!ignore_content && handle_digits == 2)
2096 {
2097 /* We must store the digit values. */
2098 if (ctype->outdigits_act >= 10)
2099 {
2100 lr_error (ldfile, _("\
2101%s: field `%s' does not contain exactly ten entries"),
2102 "LC_CTYPE", "outdigit");
2103 goto err_label;
2104 }
2105
2106 ctype->mboutdigits[ctype->outdigits_act] = seq;
2107 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2108 ++ctype->outdigits_act;
2109 }
2110 }
2111 else
2112 {
2113 /* Now it gets complicated. We have to resolve the
2114 ellipsis problem. First we must distinguish between
2115 the different kind of ellipsis and this must match the
2116 tokens we have seen. */
2117 assert (last_token != tok_none);
2118
2119 if (last_token != now->tok)
2120 {
2121 lr_error (ldfile, _("\
2122ellipsis range must be marked by two operands of same type"));
2123 lr_ignore_rest (ldfile, 0);
2124 break;
2125 }
2126
2127 if (last_token == tok_bsymbol)
2128 {
2129 if (ellipsis_token == tok_ellipsis3)
2130 lr_error (ldfile, _("with symbolic name range values \
2131the absolute ellipsis `...' must not be used"));
2132
2133 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2134 repertoire, now, last_str,
2135 class256_bit, class_bit,
2136 (ellipsis_token
2137 == tok_ellipsis4
2138 ? 10 : 16),
2139 ignore_content,
2140 handle_digits);
2141 }
2142 else if (last_token == tok_ucs4)
2143 {
2144 if (ellipsis_token != tok_ellipsis2)
2145 lr_error (ldfile, _("\
2146with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2147
2148 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2149 repertoire, now, last_wch,
2150 class256_bit, class_bit,
2151 ignore_content, handle_digits);
2152 }
2153 else
2154 {
2155 assert (last_token == tok_charcode);
2156
2157 if (ellipsis_token != tok_ellipsis3)
2158 lr_error (ldfile, _("\
2159with character code range values one must use the absolute ellipsis `...'"));
2160
2161 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2162 repertoire, now,
2163 last_charcode,
2164 last_charcode_len,
2165 class256_bit, class_bit,
2166 ignore_content,
2167 handle_digits);
2168 }
2169
2170 /* Now we have used the last value. */
2171 last_token = tok_none;
2172 }
2173
2174 /* Next we expect a semicolon or the end of the line. */
2175 now = lr_token (ldfile, charmap, NULL);
2176 if (now->tok == tok_eol || now->tok == tok_eof)
2177 break;
2178
2179 if (last_token != tok_none
2180 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
2181 {
2182 ellipsis_token = now->tok;
2183 now = lr_token (ldfile, charmap, NULL);
2184 continue;
2185 }
2186
2187 if (now->tok != tok_semicolon)
2188 goto err_label;
2189
2190 /* And get the next character. */
2191 now = lr_token (ldfile, charmap, NULL);
2192
2193 ellipsis_token = tok_none;
2194 }
2195 break;
2196
2197 case tok_digit:
b9eb05d6
UD
2198 /* Ignore the rest of the line if we don't need the input of
2199 this line. */
2200 if (ignore_content)
42d7c593
UD
2201 {
2202 lr_ignore_rest (ldfile, 0);
2203 break;
2204 }
b9eb05d6 2205
4b10dd6c
UD
2206 handle_tok_digit:
2207 class_bit = _ISwdigit;
2208 class256_bit = _ISdigit;
2209 handle_digits = 1;
2210 goto read_charclass;
2211
2212 case tok_outdigit:
b9eb05d6
UD
2213 /* Ignore the rest of the line if we don't need the input of
2214 this line. */
2215 if (ignore_content)
2216 {
2217 lr_ignore_rest (ldfile, 0);
2218 break;
2219 }
2220
4b10dd6c
UD
2221 if (ctype->outdigits_act != 0)
2222 lr_error (ldfile, _("\
2223%s: field `%s' declared more than once"),
2224 "LC_CTYPE", "outdigit");
2225 class_bit = 0;
2226 class256_bit = 0;
2227 handle_digits = 2;
2228 goto read_charclass;
2229
2230 case tok_toupper:
b9eb05d6
UD
2231 /* Ignore the rest of the line if we don't need the input of
2232 this line. */
2233 if (ignore_content)
2234 {
2235 lr_ignore_rest (ldfile, 0);
2236 break;
2237 }
2238
4b10dd6c
UD
2239 mapidx = 0;
2240 goto read_mapping;
2241
2242 case tok_tolower:
b9eb05d6
UD
2243 /* Ignore the rest of the line if we don't need the input of
2244 this line. */
2245 if (ignore_content)
2246 {
2247 lr_ignore_rest (ldfile, 0);
2248 break;
2249 }
2250
4b10dd6c
UD
2251 mapidx = 1;
2252 goto read_mapping;
2253
2254 case tok_map:
b9eb05d6
UD
2255 /* Ignore the rest of the line if we don't need the input of
2256 this line. */
2257 if (ignore_content)
2258 {
2259 lr_ignore_rest (ldfile, 0);
2260 break;
2261 }
2262
4b10dd6c
UD
2263 /* We simply forget the `map' keyword and use the following
2264 operand to determine the mapping. */
2265 now = lr_token (ldfile, charmap, NULL);
2266 if (now->tok == tok_ident || now->tok == tok_string)
2267 {
2268 size_t cnt;
2269
2270 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2271 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2272 break;
2273
7f653277
UD
2274 if (cnt < ctype->map_collection_nr)
2275 free (now->val.str.startmb);
2276 else
87372aa9
UD
2277 /* OK, it's a new map. */
2278 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2279
2280 mapidx = cnt;
4b10dd6c
UD
2281 }
2282 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2283 goto err_label;
2284 else
2285 mapidx = now->tok - tok_toupper;
2286
2287 now = lr_token (ldfile, charmap, NULL);
2288 /* This better should be a semicolon. */
2289 if (now->tok != tok_semicolon)
2290 goto err_label;
2291
2292 read_mapping:
2293 /* Test whether this mapping was already defined. */
2294 if (ctype->tomap_done[mapidx])
2295 {
2296 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2297 ctype->mapnames[mapidx]);
2298 lr_ignore_rest (ldfile, 0);
2299 break;
2300 }
2301 ctype->tomap_done[mapidx] = 1;
2302
2303 now = lr_token (ldfile, charmap, NULL);
2304 while (now->tok != tok_eol && now->tok != tok_eof)
2305 {
2306 struct charseq *from_seq;
2307 uint32_t from_wch;
2308 struct charseq *to_seq;
2309 uint32_t to_wch;
2310
2311 /* Every pair starts with an opening brace. */
2312 if (now->tok != tok_open_brace)
2313 goto err_label;
2314
2315 /* Next comes the from-value. */
2316 now = lr_token (ldfile, charmap, NULL);
2317 if (get_character (now, charmap, repertoire, &from_seq,
2318 &from_wch) != 0)
2319 goto err_label;
2320
2321 /* The next is a comma. */
2322 now = lr_token (ldfile, charmap, NULL);
2323 if (now->tok != tok_comma)
2324 goto err_label;
2325
2326 /* And the other value. */
2327 now = lr_token (ldfile, charmap, NULL);
2328 if (get_character (now, charmap, repertoire, &to_seq,
2329 &to_wch) != 0)
2330 goto err_label;
2331
2332 /* And the last thing is the closing brace. */
2333 now = lr_token (ldfile, charmap, NULL);
2334 if (now->tok != tok_close_brace)
2335 goto err_label;
2336
2337 if (!ignore_content)
2338 {
2339 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2340 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2341 /* We can use this value. */
2342 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2343 = to_seq->bytes[0];
2344
2345 if (from_wch != ILLEGAL_CHAR_VALUE
2346 && to_wch != ILLEGAL_CHAR_VALUE)
2347 /* Both correct values. */
2348 *find_idx (ctype, &ctype->map_collection[mapidx],
2349 &ctype->map_collection_max[mapidx],
2350 &ctype->map_collection_act[mapidx],
2351 from_wch) = to_wch;
2352 }
2353
2354 /* Now comes a semicolon or the end of the line/file. */
2355 now = lr_token (ldfile, charmap, NULL);
2356 if (now->tok == tok_semicolon)
2357 now = lr_token (ldfile, charmap, NULL);
2358 }
2359 break;
2360
2361 case tok_translit_start:
b9eb05d6
UD
2362 /* Ignore the rest of the line if we don't need the input of
2363 this line. */
2364 if (ignore_content)
2365 {
2366 lr_ignore_rest (ldfile, 0);
2367 break;
2368 }
2369
4b10dd6c
UD
2370 /* The rest of the line better should be empty. */
2371 lr_ignore_rest (ldfile, 1);
2372
2373 /* We count here the number of allocated entries in the `translit'
2374 array. */
2375 cnt = 0;
2376
2377 /* We proceed until we see the `translit_end' token. */
2378 while (now = lr_token (ldfile, charmap, repertoire),
2379 now->tok != tok_translit_end && now->tok != tok_eof)
2380 {
2381 if (now->tok == tok_eol)
2382 /* Ignore empty lines. */
2383 continue;
2384
2385 if (now->tok == tok_translit_end)
2386 {
2387 lr_ignore_rest (ldfile, 0);
2388 break;
2389 }
2390
2391 if (now->tok == tok_include)
2392 {
2393 /* We have to include locale. */
2394 const char *locale_name;
2395 const char *repertoire_name;
2396
2397 now = lr_token (ldfile, charmap, NULL);
2398 /* This should be a string or an identifier. In any
2399 case something to name a locale. */
2400 if (now->tok != tok_string && now->tok != tok_ident)
2401 {
2402 translit_syntax:
2403 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2404 lr_ignore_rest (ldfile, 0);
2405 continue;
2406 }
2407 locale_name = now->val.str.startmb;
2408
2409 /* Next should be a semicolon. */
2410 now = lr_token (ldfile, charmap, NULL);
2411 if (now->tok != tok_semicolon)
2412 goto translit_syntax;
2413
2414 /* Now the repertoire name. */
2415 now = lr_token (ldfile, charmap, NULL);
2416 if ((now->tok != tok_string && now->tok != tok_ident)
2417 || now->val.str.startmb == NULL)
2418 goto translit_syntax;
2419 repertoire_name = now->val.str.startmb;
2420
2421 /* We must not have more than one `include'. */
2422 if (ctype->translit_copy_locale != NULL)
2423 {
2424 lr_error (ldfile, _("\
2425%s: only one `include' instruction allowed"), "LC_CTYPE");
2426 lr_ignore_rest (ldfile, 0);
2427 continue;
2428 }
2429
2430 ctype->translit_copy_locale = locale_name;
2431 ctype->translit_copy_repertoire = repertoire_name;
2432
2433 /* The rest of the line must be empty. */
2434 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2435
2436 /* Make sure the locale is read. */
2437 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2438 repertoire_name, 1);
2439 continue;
2440 }
2441 else if (now->tok == tok_default_missing)
2442 {
2443 uint32_t *wstr;
2444
2445 /* We expect a single character or string as the
2446 argument. */
2447 now = lr_token (ldfile, charmap, NULL);
2448 wstr = read_widestring (ldfile, now, charmap, repertoire);
2449
2450 if (wstr != NULL)
2451 {
2452 if (ctype->default_missing != NULL)
2453 {
2454 lr_error (ldfile, _("\
2455%s: duplicate `default_missing' definition"), "LC_CTYPE");
2456 error_at_line (0, 0, ctype->default_missing_file,
2457 ctype->default_missing_lineno,
2458 _("previous definition was here"));
2459 }
2460 else
2461 {
2462 ctype->default_missing = wstr;
2463 ctype->default_missing_file = ldfile->fname;
2464 ctype->default_missing_lineno = ldfile->lineno;
2465 }
2466 }
2467 lr_ignore_rest (ldfile, 1);
2468 continue;
2469 }
2470 else if (now->tok == tok_translit_ignore)
2471 {
2472 read_translit_ignore_entry (ldfile, ctype, charmap,
2473 repertoire);
4b10dd6c
UD
2474 continue;
2475 }
2476
2477 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2478 }
2479 break;
2480
2481 case tok_ident:
b9eb05d6
UD
2482 /* Ignore the rest of the line if we don't need the input of
2483 this line. */
2484 if (ignore_content)
2485 {
2486 lr_ignore_rest (ldfile, 0);
2487 break;
2488 }
2489
4b10dd6c
UD
2490 /* This could mean one of several things. First test whether
2491 it's a character class name. */
2492 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2493 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2494 break;
2495 if (cnt < ctype->nr_charclass)
2496 {
2497 class_bit = _ISwbit (cnt);
2498 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2499 free (now->val.str.startmb);
2500 goto read_charclass;
2501 }
5491da0d
UD
2502 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2503 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2504 break;
2505 if (cnt < ctype->map_collection_nr)
2506 {
2507 mapidx = cnt;
2508 free (now->val.str.startmb);
2509 goto read_mapping;
2510 }
011ebfab 2511#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2512 if (strcmp (now->val.str.startmb, "special1") == 0)
2513 {
2514 class_bit = _ISwspecial1;
2515 free (now->val.str.startmb);
2516 goto read_charclass;
2517 }
2518 if (strcmp (now->val.str.startmb, "special2") == 0)
2519 {
2520 class_bit = _ISwspecial2;
2521 free (now->val.str.startmb);
2522 goto read_charclass;
2523 }
2524 if (strcmp (now->val.str.startmb, "special3") == 0)
2525 {
2526 class_bit = _ISwspecial3;
2527 free (now->val.str.startmb);
2528 goto read_charclass;
2529 }
2530 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2531 {
2532 mapidx = 2;
2533 goto read_mapping;
2534 }
011ebfab 2535#endif
4b10dd6c
UD
2536 break;
2537
2538 case tok_end:
2539 /* Next we assume `LC_CTYPE'. */
2540 now = lr_token (ldfile, charmap, NULL);
2541 if (now->tok == tok_eof)
2542 break;
2543 if (now->tok == tok_eol)
2544 lr_error (ldfile, _("%s: incomplete `END' line"),
2545 "LC_CTYPE");
2546 else if (now->tok != tok_lc_ctype)
2547 lr_error (ldfile, _("\
2548%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2549 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2550 return;
2551
2552 default:
2553 err_label:
2554 if (now->tok != tok_eof)
2555 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2556 }
2557
4b10dd6c
UD
2558 /* Prepare for the next round. */
2559 now = lr_token (ldfile, charmap, NULL);
2560 nowtok = now->tok;
19bc17a9
RM
2561 }
2562
4b10dd6c
UD
2563 /* When we come here we reached the end of the file. */
2564 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2565}
2566
2567
2568static void
4b10dd6c
UD
2569set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2570 struct repertoire_t *repertoire)
19bc17a9 2571{
4b10dd6c
UD
2572 size_t cnt;
2573
19bc17a9
RM
2574 /* These function defines the default values for the classes and conversions
2575 according to POSIX.2 2.5.2.1.
2576 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2577 Don't move them unless you know what you do! */
2578
4b10dd6c 2579 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2580 {
2581 char tmp[2];
2582 int ch;
4b10dd6c
UD
2583 int bit = _ISbit (bitpos);
2584 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2585 /* Define string. */
2586 strcpy (tmp, "?");
2587
2588 for (ch = from; ch <= to; ++ch)
2589 {
4b10dd6c 2590 struct charseq *seq;
19bc17a9
RM
2591 tmp[0] = ch;
2592
4b10dd6c
UD
2593 seq = charmap_find_value (charmap, tmp, 1);
2594 if (seq == NULL)
2595 {
2596 if (!be_quiet)
2597 error (0, 0, _("\
2598%s: character `%s' not defined in charmap while needed as default value"),
2599 "LC_CTYPE", tmp);
19bc17a9 2600 }
4b10dd6c
UD
2601 else if (seq->nbytes != 1)
2602 error (0, 0, _("\
2603%s: character `%s' in charmap not representable with one byte"),
2604 "LC_CTYPE", tmp);
19bc17a9 2605 else
4b10dd6c 2606 ctype->class256_collection[seq->bytes[0]] |= bit;
f0a4b6b1
UD
2607
2608 /* No need to search here, the ASCII value is also the Unicode
2609 value. */
2610 ELEM (ctype, class_collection, , ch) |= bitw;
19bc17a9
RM
2611 }
2612 }
2613
2614 /* Set default values if keyword was not present. */
4b10dd6c 2615 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2616 /* "If this keyword [lower] is not specified, the lowercase letters
2617 `A' through `Z', ..., shall automatically belong to this class,
2618 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2619 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2620
4b10dd6c 2621 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2622 /* "If this keyword [lower] is not specified, the lowercase letters
2623 `a' through `z', ..., shall automatically belong to this class,
2624 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2625 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2626
4b10dd6c 2627 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2628 {
2629 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2630 class `lower' *must* be in class `alpha'. */
2631 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
2632 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2633
2634 for (cnt = 0; cnt < 256; ++cnt)
2635 if ((ctype->class256_collection[cnt] & mask) != 0)
2636 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9
RM
2637
2638 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2639 if ((ctype->class_collection[cnt] & maskw) != 0)
2640 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
2641 }
2642
4b10dd6c 2643 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2644 /* "If this keyword [digit] is not specified, the digits `0' through
2645 `9', ..., shall automatically belong to this class, with
2646 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2647 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2648
2649 /* "Only characters specified for the `alpha' and `digit' keyword
2650 shall be specified. Characters specified for the keyword `alpha'
2651 and `digit' are automatically included in this class. */
2652 {
2653 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
2654 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2655
2656 for (cnt = 0; cnt < 256; ++cnt)
2657 if ((ctype->class256_collection[cnt] & mask) != 0)
2658 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9
RM
2659
2660 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2661 if ((ctype->class_collection[cnt] & maskw) != 0)
2662 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
2663 }
2664
4b10dd6c 2665 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2666 /* "If this keyword [space] is not specified, the characters <space>,
2667 <form-feed>, <newline>, <carriage-return>, <tab>, and
2668 <vertical-tab>, ..., shall automatically belong to this class,
2669 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2670 {
4b10dd6c 2671 struct charseq *seq;
19bc17a9 2672
4b10dd6c 2673 seq = charmap_find_value (charmap, "space", 5);
f0a4b6b1
UD
2674 if (seq == NULL)
2675 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2676 if (seq == NULL)
880f421f
UD
2677 {
2678 if (!be_quiet)
2679 error (0, 0, _("\
4b10dd6c
UD
2680%s: character `%s' not defined while needed as default value"),
2681 "LC_CTYPE", "<space>");
2682 }
2683 else if (seq->nbytes != 1)
2684 error (0, 0, _("\
2685%s: character `%s' in charmap not representable with one byte"),
2686 "LC_CTYPE", "<space>");
2687 else
2688 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2689
f0a4b6b1
UD
2690 /* No need to search. */
2691 ELEM (ctype, class_collection, , L' ') |= BIT (tok_space);
19bc17a9 2692
4b10dd6c 2693 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
2694 if (seq == NULL)
2695 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 2696 if (seq == NULL)
880f421f
UD
2697 {
2698 if (!be_quiet)
2699 error (0, 0, _("\
4b10dd6c
UD
2700%s: character `%s' not defined while needed as default value"),
2701 "LC_CTYPE", "<form-feed>");
2702 }
2703 else if (seq->nbytes != 1)
2704 error (0, 0, _("\
2705%s: character `%s' in charmap not representable with one byte"),
2706 "LC_CTYPE", "<form-feed>");
2707 else
2708 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2709
f0a4b6b1
UD
2710 /* No need to search. */
2711 ELEM (ctype, class_collection, , L'\f') |= BIT (tok_space);
4b10dd6c 2712
19bc17a9 2713
4b10dd6c 2714 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
2715 if (seq == NULL)
2716 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 2717 if (seq == NULL)
880f421f
UD
2718 {
2719 if (!be_quiet)
2720 error (0, 0, _("\
19bc17a9 2721character `%s' not defined while needed as default value"),
4b10dd6c
UD
2722 "<newline>");
2723 }
2724 else if (seq->nbytes != 1)
2725 error (0, 0, _("\
2726%s: character `%s' in charmap not representable with one byte"),
2727 "LC_CTYPE", "<newline>");
2728 else
2729 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2730
f0a4b6b1
UD
2731 /* No need to search. */
2732 ELEM (ctype, class_collection, , L'\n') |= BIT (tok_space);
4b10dd6c 2733
19bc17a9 2734
4b10dd6c 2735 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
2736 if (seq == NULL)
2737 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 2738 if (seq == NULL)
880f421f
UD
2739 {
2740 if (!be_quiet)
2741 error (0, 0, _("\
4b10dd6c
UD
2742%s: character `%s' not defined while needed as default value"),
2743 "LC_CTYPE", "<carriage-return>");
2744 }
2745 else if (seq->nbytes != 1)
2746 error (0, 0, _("\
2747%s: character `%s' in charmap not representable with one byte"),
2748 "LC_CTYPE", "<carriage-return>");
2749 else
2750 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2751
f0a4b6b1
UD
2752 /* No need to search. */
2753 ELEM (ctype, class_collection, , L'\r') |= BIT (tok_space);
4b10dd6c 2754
19bc17a9 2755
4b10dd6c 2756 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2757 if (seq == NULL)
2758 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 2759 if (seq == NULL)
880f421f
UD
2760 {
2761 if (!be_quiet)
2762 error (0, 0, _("\
4b10dd6c
UD
2763%s: character `%s' not defined while needed as default value"),
2764 "LC_CTYPE", "<tab>");
2765 }
2766 else if (seq->nbytes != 1)
2767 error (0, 0, _("\
2768%s: character `%s' in charmap not representable with one byte"),
2769 "LC_CTYPE", "<tab>");
2770 else
2771 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2772
f0a4b6b1
UD
2773 /* No need to search. */
2774 ELEM (ctype, class_collection, , L'\t') |= BIT (tok_space);
4b10dd6c 2775
4b10dd6c
UD
2776
2777 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
2778 if (seq == NULL)
2779 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
2780 if (seq == NULL)
2781 {
2782 if (!be_quiet)
2783 error (0, 0, _("\
2784%s: character `%s' not defined while needed as default value"),
2785 "LC_CTYPE", "<vertical-tab>");
2786 }
2787 else if (seq->nbytes != 1)
2788 error (0, 0, _("\
2789%s: character `%s' in charmap not representable with one byte"),
2790 "LC_CTYPE", "<vertical-tab>");
2791 else
2792 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
2793
2794 /* No need to search. */
2795 ELEM (ctype, class_collection, , L'\v') |= BIT (tok_space);
19bc17a9
RM
2796 }
2797
4b10dd6c 2798 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
2799 /* "If this keyword is not specified, the digits `0' to `9', the
2800 uppercase letters `A' through `F', and the lowercase letters `a'
2801 through `f', ..., shell automatically belong to this class, with
2802 implementation defined character values." [P1003.2, 2.5.2.1] */
2803 {
4b10dd6c
UD
2804 set_default (BITPOS (tok_xdigit), '0', '9');
2805 set_default (BITPOS (tok_xdigit), 'A', 'F');
2806 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
2807 }
2808
4b10dd6c 2809 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
2810 /* "If this keyword [blank] is unspecified, the characters <space> and
2811 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
2812 {
4b10dd6c 2813 struct charseq *seq;
19bc17a9 2814
4b10dd6c 2815 seq = charmap_find_value (charmap, "space", 5);
f0a4b6b1
UD
2816 if (seq == NULL)
2817 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2818 if (seq == NULL)
880f421f
UD
2819 {
2820 if (!be_quiet)
2821 error (0, 0, _("\
4b10dd6c
UD
2822%s: character `%s' not defined while needed as default value"),
2823 "LC_CTYPE", "<space>");
2824 }
2825 else if (seq->nbytes != 1)
2826 error (0, 0, _("\
2827%s: character `%s' in charmap not representable with one byte"),
2828 "LC_CTYPE", "<space>");
2829 else
2830 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2831
f0a4b6b1
UD
2832 /* No need to search. */
2833 ELEM (ctype, class_collection, , L' ') |= BIT (tok_blank);
4b10dd6c 2834
4b10dd6c
UD
2835
2836 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2837 if (seq == NULL)
2838 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
2839 if (seq == NULL)
2840 {
2841 if (!be_quiet)
2842 error (0, 0, _("\
2843%s: character `%s' not defined while needed as default value"),
2844 "LC_CTYPE", "<tab>");
2845 }
2846 else if (seq->nbytes != 1)
2847 error (0, 0, _("\
2848%s: character `%s' in charmap not representable with one byte"),
2849 "LC_CTYPE", "<tab>");
2850 else
2851 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
2852
2853 /* No need to search. */
2854 ELEM (ctype, class_collection, , L'\t') |= BIT (tok_blank);
19bc17a9
RM
2855 }
2856
4b10dd6c 2857 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
2858 /* "If this keyword [graph] is not specified, characters specified for
2859 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2860 shall belong to this character class." [P1003.2, 2.5.2.1] */
2861 {
2862 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2863 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2864 size_t cnt;
2865
2866 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2867 if ((ctype->class_collection[cnt] & mask) != 0)
2868 ctype->class_collection[cnt] |= BIT (tok_graph);
4b10dd6c
UD
2869
2870 for (cnt = 0; cnt < 256; ++cnt)
2871 if ((ctype->class256_collection[cnt] & mask) != 0)
2872 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
2873 }
2874
4b10dd6c 2875 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
2876 /* "If this keyword [print] is not provided, characters specified for
2877 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2878 and the <space> character shall belong to this character class."
2879 [P1003.2, 2.5.2.1] */
2880 {
2881 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2882 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2883 size_t cnt;
4b10dd6c 2884 struct charseq *seq;
19bc17a9
RM
2885
2886 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2887 if ((ctype->class_collection[cnt] & mask) != 0)
2888 ctype->class_collection[cnt] |= BIT (tok_print);
2889
4b10dd6c
UD
2890 for (cnt = 0; cnt < 256; ++cnt)
2891 if ((ctype->class256_collection[cnt] & mask) != 0)
2892 ctype->class256_collection[cnt] |= BIT (tok_print);
2893
2894
4b10dd6c 2895 seq = charmap_find_value (charmap, "space", 5);
f0a4b6b1
UD
2896 if (seq == NULL)
2897 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
2898 if (seq == NULL)
2899 {
2900 if (!be_quiet)
2901 error (0, 0, _("\
2902%s: character `%s' not defined while needed as default value"),
2903 "LC_CTYPE", "<space>");
2904 }
2905 else if (seq->nbytes != 1)
2906 error (0, 0, _("\
2907%s: character `%s' in charmap not representable with one byte"),
2908 "LC_CTYPE", "<space>");
2909 else
2910 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
2911
2912 /* No need to search. */
2913 ELEM (ctype, class_collection, , L' ') |= BIT (tok_print);
19bc17a9
RM
2914 }
2915
4b10dd6c 2916 if (ctype->tomap_done[0] == 0)
6d52618b 2917 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
2918 `a' through `z', and their corresponding uppercase letters `A' to
2919 `Z', ..., shall automatically be included, with implementation-
2920 defined character values." [P1003.2, 2.5.2.1] */
2921 {
2922 char tmp[4];
2923 int ch;
2924
2925 strcpy (tmp, "<?>");
2926
2927 for (ch = 'a'; ch <= 'z'; ++ch)
2928 {
4b10dd6c 2929 struct charseq *seq_from, *seq_to;
19bc17a9
RM
2930
2931 tmp[1] = (char) ch;
2932
4b10dd6c
UD
2933 seq_from = charmap_find_value (charmap, &tmp[1], 1);
2934 if (seq_from == NULL)
19bc17a9 2935 {
880f421f
UD
2936 if (!be_quiet)
2937 error (0, 0, _("\
4b10dd6c
UD
2938%s: character `%s' not defined while needed as default value"),
2939 "LC_CTYPE", tmp);
2940 }
2941 else if (seq_from->nbytes != 1)
2942 {
2943 if (!be_quiet)
2944 error (0, 0, _("\
2945%s: character `%s' needed as default value not representable with one byte"),
2946 "LC_CTYPE", tmp);
2947 }
2948 else
2949 {
2950 /* This conversion is implementation defined. */
2951 tmp[1] = (char) (ch + ('A' - 'a'));
2952 seq_to = charmap_find_value (charmap, &tmp[1], 1);
2953 if (seq_to == NULL)
2954 {
2955 if (!be_quiet)
2956 error (0, 0, _("\
2957%s: character `%s' not defined while needed as default value"),
2958 "LC_CTYPE", tmp);
2959 }
2960 else if (seq_to->nbytes != 1)
2961 {
2962 if (!be_quiet)
2963 error (0, 0, _("\
2964%s: character `%s' needed as default value not representable with one byte"),
2965 "LC_CTYPE", tmp);
2966 }
2967 else
2968 /* The index [0] is determined by the order of the
2969 `ctype_map_newP' calls in `ctype_startup'. */
2970 ctype->map256_collection[0][seq_from->bytes[0]]
2971 = seq_to->bytes[0];
19bc17a9 2972 }
f0a4b6b1
UD
2973
2974 /* No need to search. */
2975 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
2976 }
2977 }
2978
4b10dd6c 2979 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
2980 /* "If this keyword [tolower] is not specified, the mapping shall be
2981 the reverse mapping of the one specified to `toupper'." [P1003.2] */
2982 {
19bc17a9
RM
2983 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
2984 if (ctype->map_collection[0][cnt] != 0)
2985 ELEM (ctype, map_collection, [1],
2986 ctype->map_collection[0][cnt])
2987 = ctype->charnames[cnt];
4b10dd6c
UD
2988
2989 for (cnt = 0; cnt < 256; ++cnt)
2990 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 2991 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
2992 }
2993
2994 if (ctype->outdigits_act == 0)
2995 {
2996 for (cnt = 0; cnt < 10; ++cnt)
2997 {
2998 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2999 digits + cnt, 1);
3000
3001 if (ctype->mboutdigits[cnt] == NULL)
3002 {
3003 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3004 longnames[cnt],
3005 strlen (longnames[cnt]));
3006
3007 if (ctype->mboutdigits[cnt] == NULL)
3008 {
3009 /* Provide a replacement. */
3010 error (0, 0, _("\
3011no output digits defined and none of the standard names in the charmap"));
3012
3013 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3014 sizeof (struct charseq) + 1);
3015
3016 /* This is better than nothing. */
3017 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3018 ctype->mboutdigits[cnt]->nbytes = 1;
3019 }
3020 }
b9eb05d6
UD
3021
3022 ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
3023 digits + cnt, 1);
3024
3025 if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
3026 {
3027 ctype->wcoutdigits[cnt] = repertoire_find_value (repertoire,
3028 longnames[cnt],
3029 strlen (longnames[cnt]));
3030
3031 if (ctype->wcoutdigits[cnt] == ILLEGAL_CHAR_VALUE)
3032 {
3033 /* Provide a replacement. */
3034 error (0, 0, _("\
3035no output digits defined and none of the standard names in the repertoire"));
3036
3037 /* This is better than nothing. */
3038 ctype->wcoutdigits[cnt] = (uint32_t) digits[cnt];
3039 }
3040 }
4b10dd6c
UD
3041 }
3042
3043 ctype->outdigits_act = 10;
19bc17a9
RM
3044 }
3045}
3046
3047
3048static void
4b10dd6c
UD
3049allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3050 struct repertoire_t *repertoire)
19bc17a9
RM
3051{
3052 size_t idx;
a53e3292 3053 size_t width_table_size;
5d431a3e 3054
6d52618b
UD
3055 /* First we have to decide how we organize the arrays. It is easy
3056 for a one-byte character set. But multi-byte character set
3057 cannot be stored flat because the chars might be sparsely used.
3058 So we determine an optimal hashing function for the used
3059 characters.
3060
3061 We use a very trivial hashing function to store the sparse
3062 table. CH % TABSIZE is used as an index. To solve multiple hits
3063 we have N planes. This guarantees a fixed search time for a
42d7c593 3064 character [N / 2]. In the following code we determine the minimum
66ac0abe
UD
3065 value for TABSIZE * N, where TABSIZE >= 256.
3066
3067 Some people complained that this algorithm takes too long. Well,
3068 go on, improve it. But changing the step size is *not* an
3069 option. Some people changed this to use only sizes of prime
3070 numbers. Think again, do some math. We are looking for the
3071 optimal solution, not something which works in general. Unless
3072 somebody can provide a dynamic programming solution I think this
3073 implementation is as good as it can get. */
19bc17a9
RM
3074 size_t min_total = UINT_MAX;
3075 size_t act_size = 256;
3076
66ac0abe 3077 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3078 fputs (_("\
19bc17a9 3079Computing table size for character classes might take a while..."),
c84142e8 3080 stderr);
19bc17a9 3081
66ac0abe
UD
3082 /* While we want to have a small total size we are willing to use a
3083 little bit larger table if this reduces the number of layers.
3084 Therefore we add a little penalty to the number of planes.
3085 Maybe this constant has to be adjusted a bit. */
3086#define PENALTY 128
3087 do
19bc17a9
RM
3088 {
3089 size_t cnt[act_size];
3090 size_t act_planes = 1;
3091
3092 memset (cnt, '\0', sizeof cnt);
3093
3094 for (idx = 0; idx < 256; ++idx)
3095 cnt[idx] = 1;
3096
3097 for (idx = 0; idx < ctype->charnames_act; ++idx)
3098 if (ctype->charnames[idx] >= 256)
3099 {
3100 size_t nr = ctype->charnames[idx] % act_size;
3101
3102 if (++cnt[nr] > act_planes)
3103 {
3104 act_planes = cnt[nr];
66ac0abe 3105 if ((act_size + PENALTY) * act_planes >= min_total)
19bc17a9
RM
3106 break;
3107 }
3108 }
3109
66ac0abe 3110 if ((act_size + PENALTY) * act_planes < min_total)
19bc17a9 3111 {
66ac0abe 3112 min_total = (act_size + PENALTY) * act_planes;
19bc17a9
RM
3113 ctype->plane_size = act_size;
3114 ctype->plane_cnt = act_planes;
3115 }
3116
3117 ++act_size;
3118 }
66ac0abe 3119 while (act_size < min_total);
19bc17a9 3120
66ac0abe 3121 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3122 fputs (_(" done\n"), stderr);
19bc17a9 3123
75cd5204 3124
4a33c2f5
UD
3125 ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3126 * ctype->plane_cnt,
3127 sizeof (uint32_t));
19bc17a9
RM
3128
3129 for (idx = 1; idx < 256; ++idx)
4a33c2f5 3130 ctype->names[idx] = idx;
19bc17a9
RM
3131
3132 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
4a33c2f5 3133 ctype->names[0] = 1;
19bc17a9
RM
3134
3135 for (idx = 256; idx < ctype->charnames_act; ++idx)
3136 {
3137 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3138 size_t depth = 0;
3139
4a33c2f5 3140 while (ctype->names[nr + depth * ctype->plane_size])
19bc17a9
RM
3141 ++depth;
3142 assert (depth < ctype->plane_cnt);
3143
4a33c2f5 3144 ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
19bc17a9
RM
3145
3146 /* Now for faster access remember the index in the NAMES_B array. */
3147 ctype->charnames[idx] = nr + depth * ctype->plane_size;
3148 }
4a33c2f5 3149 ctype->names[0] = 0;
19bc17a9
RM
3150
3151
3152 /* You wonder about this amount of memory? This is only because some
3153 users do not manage to address the array with unsigned values or
3154 data types with range >= 256. '\200' would result in the array
3155 index -128. To help these poor people we duplicate the entries for
3156 128 up to 255 below the entry for \0. */
3157 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3158 sizeof (char_class_t));
3159 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
3160 * ctype->plane_cnt,
3161 sizeof (char_class32_t));
3162
4a33c2f5 3163 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 3164 for (idx = 0; idx < 256; ++idx)
4a33c2f5 3165 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 3166
75cd5204
RM
3167 /* Mirror first 127 entries. We must take care that entry -1 is not
3168 mirrored because EOF == -1. */
3169 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
3170 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3171
3172 /* The 32 bit array contains all characters. */
3173 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4a33c2f5 3174 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
19bc17a9
RM
3175
3176 /* Room for table of mappings. */
49f2be5b
UD
3177 ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3178 ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
4a33c2f5 3179 * sizeof (uint32_t *));
19bc17a9
RM
3180
3181 /* Fill in all mappings. */
49f2be5b 3182 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
3183 {
3184 unsigned int idx2;
3185
3186 /* Allocate table. */
49f2be5b 3187 ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
3188
3189 /* Copy values from collection. */
4b10dd6c 3190 for (idx2 = 0; idx2 < 256; ++idx2)
4a33c2f5 3191 ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 3192
75cd5204
RM
3193 /* Mirror first 127 entries. We must take care not to map entry
3194 -1 because EOF == -1. */
3195 for (idx2 = 0; idx2 < 127; ++idx2)
4a33c2f5 3196 ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
19bc17a9 3197
75cd5204 3198 /* EOF must map to EOF. */
4a33c2f5 3199 ctype->map[idx][127] = EOF;
49f2be5b 3200 }
a9c27b3e 3201
49f2be5b
UD
3202 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3203 {
3204 unsigned int idx2;
3205
3206 /* Allocate table. */
f1d8b804
UD
3207 ctype->map32[idx] = (uint32_t *) xmalloc (ctype->plane_size
3208 * ctype->plane_cnt
3209 * sizeof (uint32_t));
49f2be5b
UD
3210
3211 /* Copy default value (identity mapping). */
f1d8b804 3212 memcpy (ctype->map32[idx], ctype->names,
49f2be5b
UD
3213 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3214
3215 /* Copy values from collection. */
3216 for (idx2 = 0; idx2 < 256; ++idx2)
a9c27b3e 3217 if (ctype->map_collection[idx][idx2] != 0)
f1d8b804
UD
3218 ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
3219
3220 while (idx2 < ctype->map_collection_act[idx])
b06c53e7
UD
3221 {
3222 if (ctype->map_collection[idx][idx2] != 0)
450bf66e
UD
3223 ctype->map32[idx][ctype->charnames[idx2]] =
3224 ctype->map_collection[idx][idx2];
b06c53e7
UD
3225 ++idx2;
3226 }
19bc17a9
RM
3227 }
3228
3229 /* Extra array for class and map names. */
4b10dd6c
UD
3230 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3231 * sizeof (uint32_t));
3232 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3233 * sizeof (uint32_t));
75cd5204
RM
3234
3235 /* Array for width information. Because the expected width are very
3236 small we use only one single byte. This save space and we need
3237 not provide the information twice with both endianesses. */
5866b131
UD
3238 width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
3239 ctype->width = (unsigned char *) xmalloc (width_table_size);
3240
75cd5204 3241 /* Initialize with default width value. */
5866b131 3242 memset (ctype->width, charmap->width_default, width_table_size);
4b10dd6c 3243 if (charmap->width_rules != NULL)
75cd5204
RM
3244 {
3245 size_t cnt;
3246
4b10dd6c 3247 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
827ff758
UD
3248 {
3249 unsigned char bytes[charmap->mb_cur_max];
3250 int nbytes = charmap->width_rules[cnt].from->nbytes;
3251
3252 /* We have the range of character for which the width is
3253 specified described using byte sequences of the multibyte
3254 charset. We have to convert this to UCS4 now. And we
3255 cannot simply convert the beginning and the end of the
3256 sequence, we have to iterate over the byte sequence and
3257 convert it for every single character. */
3258 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3259
3260 while (nbytes < charmap->width_rules[cnt].to->nbytes
3261 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3262 nbytes) <= 0)
75cd5204 3263 {
827ff758 3264 /* Find the UCS value for `bytes'. */
827ff758 3265 int inner;
76e680a8
UD
3266 uint32_t wch;
3267 struct charseq *seq =
3268 charmap_find_symbol (charmap, bytes, nbytes);
3269
3270 if (seq == NULL)
3271 wch = ILLEGAL_CHAR_VALUE;
3272 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3273 wch = seq->ucs4;
3274 else
3275 wch = repertoire_find_value (ctype->repertoire, seq->name,
3276 strlen (seq->name));
827ff758
UD
3277
3278 if (wch != ILLEGAL_CHAR_VALUE)
3279 {
3280 /* Store the value. */
b1c9ad82 3281 size_t nr = wch % ctype->plane_size;
827ff758
UD
3282 size_t depth = 0;
3283
b1c9ad82 3284 while (ctype->names[nr + depth * ctype->plane_size] != wch)
827ff758
UD
3285 ++depth;
3286 assert (depth < ctype->plane_cnt);
3287
3288 ctype->width[nr + depth * ctype->plane_size]
3289 = charmap->width_rules[cnt].width;
3290 }
3291
3292 /* "Increment" the bytes sequence. */
3293 inner = nbytes - 1;
3294 while (inner >= 0 && bytes[inner] == 0xff)
3295 --inner;
75cd5204 3296
827ff758
UD
3297 if (inner < 0)
3298 {
3299 /* We have to extend the byte sequence. */
3300 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3301 break;
75cd5204 3302
827ff758
UD
3303 bytes[0] = 1;
3304 memset (&bytes[1], 0, nbytes);
3305 ++nbytes;
3306 }
3307 else
3308 {
3309 ++bytes[inner];
3310 while (++inner < nbytes)
3311 bytes[inner] = 0;
3312 }
75cd5204 3313 }
827ff758 3314 }
75cd5204 3315 }
0200214b 3316
4b10dd6c
UD
3317 /* Set MB_CUR_MAX. */
3318 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 3319
4b10dd6c
UD
3320 /* Now determine the table for the transliteration information.
3321
3322 XXX It is not yet clear to me whether it is worth implementing a
3323 complicated algorithm which uses a hash table to locate the entries.
3324 For now I'll use a simple array which can be searching using binary
3325 search. */
3326 if (ctype->translit_copy_locale != NULL)
3327 {
3328 /* Fold in the transliteration information from the locale mentioned
3329 in the `include' statement. */
3330 struct locale_ctype_t *here = ctype;
3331
3332 do
3333 {
3334 struct localedef_t *other = find_locale (LC_CTYPE,
3335 here->translit_copy_locale,
3336 repertoire->name, charmap);
3337
3338 if (other == NULL)
3339 {
3340 error (0, 0, _("\
3341%s: transliteration data from locale `%s' not available"),
3342 "LC_CTYPE", here->translit_copy_locale);
3343 break;
3344 }
3345
3346 here = other->categories[LC_CTYPE].ctype;
3347
3348 /* Enqueue the information if necessary. */
3349 if (here->translit != NULL)
3350 {
3351 struct translit_t *endp = here->translit;
3352 while (endp->next != NULL)
3353 endp = endp->next;
3354
3355 endp->next = ctype->translit;
3356 ctype->translit = here->translit;
3357 }
3358 }
3359 while (here->translit_copy_locale != NULL);
3360 }
3361
3362 if (ctype->translit != NULL)
3363 {
3364 /* First count how many entries we have. This is the upper limit
3365 since some entries from the included files might be overwritten. */
3366 size_t number = 0;
3367 size_t cnt;
3368 struct translit_t *runp = ctype->translit;
3369 struct translit_t **sorted;
3370 size_t from_len, to_len;
3371
3372 while (runp != NULL)
3373 {
3374 ++number;
3375 runp = runp->next;
3376 }
3377
3378 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
3379 sorted = (struct translit_t **) alloca (number
3380 * sizeof (struct translit_t **));
4b10dd6c
UD
3381 runp = ctype->translit;
3382 number = 0;
3383 do
3384 {
3385 /* Search for the place where to insert this string.
3386 XXX Better use a real sorting algorithm later. */
3387 size_t idx = 0;
3388 int replace = 0;
3389
3390 while (idx < number)
3391 {
3392 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3393 (const wchar_t *) runp->from);
3394 if (res == 0)
3395 {
3396 replace = 1;
3397 break;
3398 }
3399 if (res > 0)
3400 break;
3401 ++idx;
3402 }
3403
3404 if (replace)
3405 sorted[idx] = runp;
3406 else
3407 {
3408 memmove (&sorted[idx + 1], &sorted[idx],
3409 (number - idx) * sizeof (struct translit_t *));
3410 sorted[idx] = runp;
3411 ++number;
3412 }
3413
3414 runp = runp->next;
3415 }
3416 while (runp != NULL);
3417
3418 /* The next step is putting all the possible transliteration
3419 strings in one memory block so that we can write it out.
3420 We need several different blocks:
3421 - index to the tfromstring array
3422 - from-string array
3423 - index to the to-string array
3424 - to-string array.
3425 And this all must be available for both endianes variants.
3426 */
3427 from_len = to_len = 0;
3428 for (cnt = 0; cnt < number; ++cnt)
3429 {
3430 struct translit_to_t *srunp;
3431 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3432 srunp = sorted[cnt]->to;
3433 while (srunp != NULL)
3434 {
3435 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3436 srunp = srunp->next;
3437 }
3438 /* Plus one for the extra NUL character marking the end of
3439 the list for the current entry. */
3440 ++to_len;
3441 }
3442
3443 /* We can allocate the arrays for the results. */
4a33c2f5
UD
3444 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3445 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3446 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3447 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
3448
3449 from_len = 0;
3450 to_len = 0;
3451 for (cnt = 0; cnt < number; ++cnt)
3452 {
3453 size_t len;
3454 struct translit_to_t *srunp;
3455
4a33c2f5
UD
3456 ctype->translit_from_idx[cnt] = from_len;
3457 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3458
3459 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 3460 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
3461 (const wchar_t *) sorted[cnt]->from, len);
3462 from_len += len;
3463
4a33c2f5 3464 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3465 srunp = sorted[cnt]->to;
3466 while (srunp != NULL)
3467 {
3468 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 3469 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
3470 (const wchar_t *) srunp->str, len);
3471 to_len += len;
3472 srunp = srunp->next;
3473 }
4a33c2f5 3474 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 3475 }
4b10dd6c
UD
3476
3477 /* Store the information about the length. */
3478 ctype->translit_idx_size = number * sizeof (uint32_t);
3479 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3480 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3481 }
3482 else
3483 {
3484 /* Provide some dummy pointers since we have nothing to write out. */
3485 static uint32_t no_str = { 0 };
3486
4a33c2f5
UD
3487 ctype->translit_from_idx = &no_str;
3488 ctype->translit_from_tbl = &no_str;
3489 ctype->translit_to_tbl = &no_str;
4b10dd6c
UD
3490 ctype->translit_idx_size = 0;
3491 ctype->translit_from_tbl_size = 0;
3492 ctype->translit_to_tbl_size = 0;
3493 }
19bc17a9 3494}