]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Clean up locale file alignment handling.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
568035b7 1/* Copyright (C) 1995-2013 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
43bc8ac6 5 This program is free software; you can redistribute it and/or modify
2e2efe65
RM
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
19bc17a9 9
43bc8ac6 10 This program is distributed in the hope that it will be useful,
c84142e8 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
19bc17a9 14
43bc8ac6 15 You should have received a copy of the GNU General Public License
59ba27a6 16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19bc17a9
RM
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
a68b0d31 22#include <alloca.h>
4b10dd6c 23#include <byteswap.h>
19bc17a9 24#include <endian.h>
4b10dd6c 25#include <errno.h>
19bc17a9 26#include <limits.h>
4b10dd6c
UD
27#include <obstack.h>
28#include <stdlib.h>
19bc17a9 29#include <string.h>
4b10dd6c
UD
30#include <wchar.h>
31#include <wctype.h>
e054f494 32#include <stdint.h>
4b10dd6c 33#include <sys/uio.h>
19bc17a9 34
f2b98f97 35#include "localedef.h"
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c 41#include "locfile.h"
19bc17a9 42
19bc17a9
RM
43#include <assert.h>
44
45
011ebfab 46#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
47/* These are the extra bits not in wctype.h since these are not preallocated
48 classes. */
011ebfab
UD
49# define _ISwspecial1 (1 << 29)
50# define _ISwspecial2 (1 << 30)
51# define _ISwspecial3 (1 << 31)
52#endif
19bc17a9
RM
53
54
55/* The bit used for representing a special class. */
56#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
57#define BIT(class) (_ISbit (BITPOS (class)))
58#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
59
60#define ELEM(ctype, collection, idx, value) \
61 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
62 &ctype->collection##_act idx, value)
63
19bc17a9
RM
64
65/* To be compatible with former implementations we for now restrict
66 the number of bits for character classes to 16. When compatibility
67 is not necessary anymore increase the number to 32. */
4b10dd6c 68#define char_class_t uint16_t
4b10dd6c 69#define char_class32_t uint32_t
4b10dd6c
UD
70
71
72/* Type to describe a transliteration action. We have a possibly
73 multiple character from-string and a set of multiple character
74 to-strings. All are 32bit values since this is what is used in
75 the gconv functions. */
76struct translit_to_t
77{
78 uint32_t *str;
79
80 struct translit_to_t *next;
81};
82
83struct translit_t
84{
85 uint32_t *from;
86
a673fbcb
UD
87 const char *fname;
88 size_t lineno;
89
4b10dd6c
UD
90 struct translit_to_t *to;
91
92 struct translit_t *next;
93};
19bc17a9 94
a673fbcb
UD
95struct translit_ignore_t
96{
97 uint32_t from;
98 uint32_t to;
a0dc5206 99 uint32_t step;
a673fbcb
UD
100
101 const char *fname;
102 size_t lineno;
103
104 struct translit_ignore_t *next;
105};
106
19bc17a9 107
02fb3d17 108/* Type to describe a transliteration include statement. */
0a12bf88 109struct translit_include_t
02fb3d17
UD
110{
111 const char *copy_locale;
112 const char *copy_repertoire;
113
114 struct translit_include_t *next;
115};
116
117
601d2942
UD
118/* Sparse table of uint32_t. */
119#define TABLE idx_table
120#define ELEMENT uint32_t
bd75759f 121#define DEFAULT ((uint32_t) ~0)
1ecbb381 122#define NO_ADD_LOCALE
601d2942
UD
123#include "3level.h"
124
1ecbb381
RS
125#define TABLE wcwidth_table
126#define ELEMENT uint8_t
127#define DEFAULT 0xff
128#include "3level.h"
129
130#define TABLE wctrans_table
131#define ELEMENT int32_t
132#define DEFAULT 0
133#define wctrans_table_add wctrans_table_add_internal
134#include "3level.h"
135#undef wctrans_table_add
136/* The wctrans_table must actually store the difference between the
137 desired result and the argument. */
138static inline void
139wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
140{
141 wctrans_table_add_internal (t, wc, mapped_wc - wc);
142}
143
144/* Construction of sparse 3-level tables.
145 See wchar-lookup.h for their structure and the meaning of p and q. */
146
147struct wctype_table
148{
149 /* Parameters. */
150 unsigned int p;
151 unsigned int q;
152 /* Working representation. */
153 size_t level1_alloc;
154 size_t level1_size;
155 uint32_t *level1;
156 size_t level2_alloc;
157 size_t level2_size;
158 uint32_t *level2;
159 size_t level3_alloc;
160 size_t level3_size;
161 uint32_t *level3;
162 size_t result_size;
163};
164
165static void add_locale_wctype_table (struct locale_file *file,
166 struct wctype_table *t);
601d2942 167
19bc17a9
RM
168/* The real definition of the struct for the LC_CTYPE locale. */
169struct locale_ctype_t
170{
4b10dd6c 171 uint32_t *charnames;
19bc17a9
RM
172 size_t charnames_max;
173 size_t charnames_act;
04ea3b0f 174 /* An index lookup table, to speedup find_idx. */
601d2942 175 struct idx_table charnames_idx;
19bc17a9 176
4b10dd6c
UD
177 struct repertoire_t *repertoire;
178
179 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
180#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 181 size_t nr_charclass;
19bc17a9 182 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
183 uint32_t last_class_char;
184 uint32_t class256_collection[256];
185 uint32_t *class_collection;
19bc17a9
RM
186 size_t class_collection_max;
187 size_t class_collection_act;
4b10dd6c 188 uint32_t class_done;
ef446144 189 uint32_t class_offset;
4b10dd6c
UD
190
191 struct charseq **mbdigits;
192 size_t mbdigits_act;
193 size_t mbdigits_max;
194 uint32_t *wcdigits;
195 size_t wcdigits_act;
196 size_t wcdigits_max;
197
198 struct charseq *mboutdigits[10];
199 uint32_t wcoutdigits[10];
200 size_t outdigits_act;
19bc17a9
RM
201
202 /* If the following number ever turns out to be too small simply
203 increase it. But I doubt it will. --drepper@gnu */
204#define MAX_NR_CHARMAP 16
205 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
206 uint32_t *map_collection[MAX_NR_CHARMAP];
207 uint32_t map256_collection[2][256];
9a0a462c
UD
208 size_t map_collection_max[MAX_NR_CHARMAP];
209 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
210 size_t map_collection_nr;
211 size_t last_map_idx;
4b10dd6c 212 int tomap_done[MAX_NR_CHARMAP];
ef446144 213 uint32_t map_offset;
4b10dd6c
UD
214
215 /* Transliteration information. */
02fb3d17 216 struct translit_include_t *translit_include;
4b10dd6c 217 struct translit_t *translit;
a673fbcb 218 struct translit_ignore_t *translit_ignore;
a8e4c924 219 uint32_t ntranslit_ignore;
a673fbcb
UD
220
221 uint32_t *default_missing;
222 const char *default_missing_file;
223 size_t default_missing_lineno;
19bc17a9 224
f0c7c524 225 uint32_t to_nonascii;
8a449450 226 uint32_t nonascii_case;
f0c7c524 227
19bc17a9 228 /* The arrays for the binary representation. */
19bc17a9
RM
229 char_class_t *ctype_b;
230 char_class32_t *ctype32_b;
4c7d276e
UD
231 uint32_t **map_b;
232 uint32_t **map32_b;
8fb81470 233 uint32_t **class_b;
1ecbb381
RS
234 struct wctype_table *class_3level;
235 struct wctrans_table *map_3level;
4b10dd6c
UD
236 uint32_t *class_name_ptr;
237 uint32_t *map_name_ptr;
1ecbb381 238 struct wcwidth_table width;
4b10dd6c 239 uint32_t mb_cur_max;
6990326c 240 const char *codeset_name;
4a33c2f5
UD
241 uint32_t *translit_from_idx;
242 uint32_t *translit_from_tbl;
243 uint32_t *translit_to_idx;
244 uint32_t *translit_to_tbl;
04fbc779 245 uint32_t translit_idx_size;
4b10dd6c
UD
246 size_t translit_from_tbl_size;
247 size_t translit_to_tbl_size;
248
a673fbcb 249 struct obstack mempool;
19bc17a9
RM
250};
251
47e8b443 252
bd75759f
AJ
253/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
254 whether 'int' is 16 bit, 32 bit, or 64 bit. */
255#define EMPTY ((uint32_t) ~0)
256
19bc17a9 257
4b10dd6c
UD
258#define obstack_chunk_alloc xmalloc
259#define obstack_chunk_free free
260
261
19bc17a9 262/* Prototypes for local functions. */
4b10dd6c 263static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
47e8b443 264 const struct charmap_t *charmap,
a6bd56c7
UD
265 struct localedef_t *copy_locale,
266 int ignore_content);
4b10dd6c
UD
267static void ctype_class_new (struct linereader *lr,
268 struct locale_ctype_t *ctype, const char *name);
269static void ctype_map_new (struct linereader *lr,
270 struct locale_ctype_t *ctype,
47e8b443 271 const char *name, const struct charmap_t *charmap);
4b10dd6c 272static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
08ffcf34 273 size_t *max, size_t *act, uint32_t idx);
19bc17a9 274static void set_class_defaults (struct locale_ctype_t *ctype,
47e8b443 275 const struct charmap_t *charmap,
4b10dd6c 276 struct repertoire_t *repertoire);
75cd5204 277static void allocate_arrays (struct locale_ctype_t *ctype,
47e8b443 278 const struct charmap_t *charmap,
4b10dd6c 279 struct repertoire_t *repertoire);
19bc17a9
RM
280
281
4b10dd6c
UD
282static const char *longnames[] =
283{
284 "zero", "one", "two", "three", "four",
285 "five", "six", "seven", "eight", "nine"
286};
1b97149d
UD
287static const char *uninames[] =
288{
289 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
290 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
291};
4b10dd6c
UD
292static const unsigned char digits[] = "0123456789";
293
294
295static void
19bc17a9 296ctype_startup (struct linereader *lr, struct localedef_t *locale,
47e8b443
UD
297 const struct charmap_t *charmap,
298 struct localedef_t *copy_locale, int ignore_content)
19bc17a9
RM
299{
300 unsigned int cnt;
301 struct locale_ctype_t *ctype;
302
a6bd56c7 303 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
19bc17a9 304 {
a6bd56c7
UD
305 if (copy_locale == NULL)
306 {
307 /* Allocate the needed room. */
308 locale->categories[LC_CTYPE].ctype = ctype =
309 (struct locale_ctype_t *) xcalloc (1,
310 sizeof (struct locale_ctype_t));
311
312 /* We have seen no names yet. */
313 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
08ffcf34
RS
314 ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
315 * sizeof (uint32_t));
a6bd56c7
UD
316 for (cnt = 0; cnt < 256; ++cnt)
317 ctype->charnames[cnt] = cnt;
318 ctype->charnames_act = 256;
601d2942 319 idx_table_init (&ctype->charnames_idx);
a6bd56c7
UD
320
321 /* Fill character class information. */
322 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
323 /* The order of the following instructions determines the bit
324 positions! */
325 ctype_class_new (lr, ctype, "upper");
326 ctype_class_new (lr, ctype, "lower");
327 ctype_class_new (lr, ctype, "alpha");
328 ctype_class_new (lr, ctype, "digit");
329 ctype_class_new (lr, ctype, "xdigit");
330 ctype_class_new (lr, ctype, "space");
331 ctype_class_new (lr, ctype, "print");
332 ctype_class_new (lr, ctype, "graph");
333 ctype_class_new (lr, ctype, "blank");
334 ctype_class_new (lr, ctype, "cntrl");
335 ctype_class_new (lr, ctype, "punct");
336 ctype_class_new (lr, ctype, "alnum");
011ebfab 337#ifdef PREDEFINED_CLASSES
a6bd56c7
UD
338 /* The following are extensions from ISO 14652. */
339 ctype_class_new (lr, ctype, "left_to_right");
340 ctype_class_new (lr, ctype, "right_to_left");
341 ctype_class_new (lr, ctype, "num_terminator");
342 ctype_class_new (lr, ctype, "num_separator");
343 ctype_class_new (lr, ctype, "segment_separator");
344 ctype_class_new (lr, ctype, "block_separator");
345 ctype_class_new (lr, ctype, "direction_control");
346 ctype_class_new (lr, ctype, "sym_swap_layout");
347 ctype_class_new (lr, ctype, "char_shape_selector");
348 ctype_class_new (lr, ctype, "num_shape_selector");
349 ctype_class_new (lr, ctype, "non_spacing");
350 ctype_class_new (lr, ctype, "non_spacing_level3");
351 ctype_class_new (lr, ctype, "normal_connect");
352 ctype_class_new (lr, ctype, "r_connect");
353 ctype_class_new (lr, ctype, "no_connect");
354 ctype_class_new (lr, ctype, "no_connect-space");
355 ctype_class_new (lr, ctype, "vowel_connect");
011ebfab 356#endif
4b10dd6c 357
a6bd56c7
UD
358 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
359 ctype->class_collection
360 = (uint32_t *) xcalloc (sizeof (unsigned long int),
361 ctype->class_collection_max);
362 ctype->class_collection_act = 256;
4b10dd6c 363
a6bd56c7
UD
364 /* Fill character map information. */
365 ctype->last_map_idx = MAX_NR_CHARMAP;
366 ctype_map_new (lr, ctype, "toupper", charmap);
367 ctype_map_new (lr, ctype, "tolower", charmap);
011ebfab 368#ifdef PREDEFINED_CLASSES
a6bd56c7 369 ctype_map_new (lr, ctype, "tosymmetric", charmap);
011ebfab 370#endif
4b10dd6c 371
a6bd56c7
UD
372 /* Fill first 256 entries in `toXXX' arrays. */
373 for (cnt = 0; cnt < 256; ++cnt)
374 {
375 ctype->map_collection[0][cnt] = cnt;
376 ctype->map_collection[1][cnt] = cnt;
9e2b7438 377#ifdef PREDEFINED_CLASSES
a6bd56c7 378 ctype->map_collection[2][cnt] = cnt;
9e2b7438 379#endif
a6bd56c7
UD
380 ctype->map256_collection[0][cnt] = cnt;
381 ctype->map256_collection[1][cnt] = cnt;
382 }
4b10dd6c 383
cb2eab1f
UD
384 if (enc_not_ascii_compatible)
385 ctype->to_nonascii = 1;
386
a6bd56c7
UD
387 obstack_init (&ctype->mempool);
388 }
389 else
390 ctype = locale->categories[LC_CTYPE].ctype =
391 copy_locale->categories[LC_CTYPE].ctype;
19bc17a9
RM
392 }
393}
394
395
396void
47e8b443 397ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
19bc17a9
RM
398{
399 /* See POSIX.2, table 2-6 for the meaning of the following table. */
400#define NCLASS 12
401 static const struct
402 {
403 const char *name;
404 const char allow[NCLASS];
405 }
406 valid_table[NCLASS] =
407 {
408 /* The order is important. See token.h for more information.
409 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
410 { "upper", "--MX-XDDXXX-" },
411 { "lower", "--MX-XDDXXX-" },
412 { "alpha", "---X-XDDXXX-" },
413 { "digit", "XXX--XDDXXX-" },
414 { "xdigit", "-----XDDXXX-" },
415 { "space", "XXXXX------X" },
416 { "print", "---------X--" },
417 { "graph", "---------X--" },
418 { "blank", "XXXXXM-----X" },
419 { "cntrl", "XXXXX-XX--XX" },
420 { "punct", "XXXXX-DD-X-X" },
421 { "alnum", "-----XDDXXX-" }
422 };
423 size_t cnt;
424 int cls1, cls2;
4b10dd6c
UD
425 uint32_t space_value;
426 struct charseq *space_seq;
19bc17a9 427 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 428 int warned;
0e16ecfa
UD
429 const void *key;
430 size_t len;
431 void *vdata;
432 void *curs;
19bc17a9 433
b9eb05d6
UD
434 /* Now resolve copying and also handle completely missing definitions. */
435 if (ctype == NULL)
436 {
70e51ab9
UD
437 const char *repertoire_name;
438
b9eb05d6
UD
439 /* First see whether we were supposed to copy. If yes, find the
440 actual definition. */
441 if (locale->copy_name[LC_CTYPE] != NULL)
442 {
443 /* Find the copying locale. This has to happen transitively since
444 the locale we are copying from might also copying another one. */
445 struct localedef_t *from = locale;
446
447 do
448 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
449 from->repertoire_name, charmap);
450 while (from->categories[LC_CTYPE].ctype == NULL
451 && from->copy_name[LC_CTYPE] != NULL);
452
453 ctype = locale->categories[LC_CTYPE].ctype
454 = from->categories[LC_CTYPE].ctype;
455 }
456
457 /* If there is still no definition issue an warning and create an
458 empty one. */
459 if (ctype == NULL)
460 {
f6ada7ad 461 if (! be_quiet)
f2b98f97
UD
462 WITH_CUR_LOCALE (error (0, 0, _("\
463No definition for %s category found"), "LC_CTYPE"));
a6bd56c7 464 ctype_startup (NULL, locale, charmap, NULL, 0);
b9eb05d6
UD
465 ctype = locale->categories[LC_CTYPE].ctype;
466 }
70e51ab9
UD
467
468 /* Get the repertoire we have to use. */
469 repertoire_name = locale->repertoire_name ?: repertoire_global;
470 if (repertoire_name != NULL)
471 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
472 }
473
db76d943
UD
474 /* We need the name of the currently used 8-bit character set to
475 make correct conversion between this 8-bit representation and the
476 ISO 10646 character set used internally for wide characters. */
477 ctype->codeset_name = charmap->code_set_name;
478 if (ctype->codeset_name == NULL)
479 {
480 if (! be_quiet)
f2b98f97
UD
481 WITH_CUR_LOCALE (error (0, 0, _("\
482No character set name specified in charmap")));
db76d943
UD
483 ctype->codeset_name = "//UNKNOWN//";
484 }
485
19bc17a9 486 /* Set default value for classes not specified. */
4b10dd6c 487 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
488
489 /* Check according to table. */
42d7c593 490 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 491 {
4b10dd6c 492 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 493
4b10dd6c
UD
494 if (tmp != 0)
495 {
496 for (cls1 = 0; cls1 < NCLASS; ++cls1)
497 if ((tmp & _ISwbit (cls1)) != 0)
498 for (cls2 = 0; cls2 < NCLASS; ++cls2)
499 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 500 {
4b10dd6c
UD
501 int eq = (tmp & _ISwbit (cls2)) != 0;
502 switch (valid_table[cls1].allow[cls2])
19bc17a9 503 {
4b10dd6c
UD
504 case 'M':
505 if (!eq)
506 {
507 uint32_t value = ctype->charnames[cnt];
508
509 if (!be_quiet)
f2b98f97 510 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 511character L'\\u%0*x' in class `%s' must be in class `%s'"),
f2b98f97
UD
512 value > 0xffff ? 8 : 4,
513 value,
514 valid_table[cls1].name,
515 valid_table[cls2].name));
4b10dd6c
UD
516 }
517 break;
518
519 case 'X':
520 if (eq)
521 {
522 uint32_t value = ctype->charnames[cnt];
523
524 if (!be_quiet)
f2b98f97 525 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 526character L'\\u%0*x' in class `%s' must not be in class `%s'"),
f2b98f97
UD
527 value > 0xffff ? 8 : 4,
528 value,
529 valid_table[cls1].name,
530 valid_table[cls2].name));
4b10dd6c
UD
531 }
532 break;
533
534 case 'D':
535 ctype->class_collection[cnt] |= _ISwbit (cls2);
536 break;
537
538 default:
f2b98f97
UD
539 WITH_CUR_LOCALE (error (5, 0, _("\
540internal error in %s, line %u"), __FUNCTION__, __LINE__));
19bc17a9 541 }
4b10dd6c
UD
542 }
543 }
544 }
545
546 for (cnt = 0; cnt < 256; ++cnt)
547 {
548 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 549
4b10dd6c
UD
550 if (tmp != 0)
551 {
552 for (cls1 = 0; cls1 < NCLASS; ++cls1)
553 if ((tmp & _ISbit (cls1)) != 0)
554 for (cls2 = 0; cls2 < NCLASS; ++cls2)
555 if (valid_table[cls1].allow[cls2] != '-')
556 {
557 int eq = (tmp & _ISbit (cls2)) != 0;
558 switch (valid_table[cls1].allow[cls2])
19bc17a9 559 {
4b10dd6c
UD
560 case 'M':
561 if (!eq)
562 {
563 char buf[17];
564
5d431a3e 565 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
566
567 if (!be_quiet)
f2b98f97 568 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 569character '%s' in class `%s' must be in class `%s'"),
f2b98f97
UD
570 buf,
571 valid_table[cls1].name,
572 valid_table[cls2].name));
4b10dd6c
UD
573 }
574 break;
575
576 case 'X':
577 if (eq)
578 {
579 char buf[17];
580
5d431a3e 581 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
582
583 if (!be_quiet)
f2b98f97 584 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 585character '%s' in class `%s' must not be in class `%s'"),
f2b98f97
UD
586 buf,
587 valid_table[cls1].name,
588 valid_table[cls2].name));
4b10dd6c
UD
589 }
590 break;
591
592 case 'D':
593 ctype->class256_collection[cnt] |= _ISbit (cls2);
594 break;
595
596 default:
f2b98f97
UD
597 WITH_CUR_LOCALE (error (5, 0, _("\
598internal error in %s, line %u"), __FUNCTION__, __LINE__));
19bc17a9 599 }
4b10dd6c
UD
600 }
601 }
19bc17a9
RM
602 }
603
604 /* ... and now test <SP> as a special case. */
a0dc5206
UD
605 space_value = 32;
606 if (((cnt = BITPOS (tok_space),
607 (ELEM (ctype, class_collection, , space_value)
608 & BITw (tok_space)) == 0)
609 || (cnt = BITPOS (tok_blank),
610 (ELEM (ctype, class_collection, , space_value)
611 & BITw (tok_blank)) == 0)))
880f421f
UD
612 {
613 if (!be_quiet)
f2b98f97
UD
614 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
615 valid_table[cnt].name));
880f421f 616 }
c84142e8
UD
617 else if (((cnt = BITPOS (tok_punct),
618 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 619 & BITw (tok_punct)) != 0)
c84142e8
UD
620 || (cnt = BITPOS (tok_graph),
621 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 622 & BITw (tok_graph))
880f421f
UD
623 != 0)))
624 {
625 if (!be_quiet)
f2b98f97
UD
626 WITH_CUR_LOCALE (error (0, 0, _("\
627<SP> character must not be in class `%s'"),
628 valid_table[cnt].name));
880f421f 629 }
19bc17a9 630 else
4b10dd6c
UD
631 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
632
633 space_seq = charmap_find_value (charmap, "SP", 2);
ce177a84 634 if (space_seq == NULL)
45c95239
UD
635 space_seq = charmap_find_value (charmap, "space", 5);
636 if (space_seq == NULL)
1b97149d 637 space_seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
638 if (space_seq == NULL || space_seq->nbytes != 1)
639 {
640 if (!be_quiet)
f2b98f97
UD
641 WITH_CUR_LOCALE (error (0, 0, _("\
642character <SP> not defined in character map")));
4b10dd6c
UD
643 }
644 else if (((cnt = BITPOS (tok_space),
645 (ctype->class256_collection[space_seq->bytes[0]]
646 & BIT (tok_space)) == 0)
647 || (cnt = BITPOS (tok_blank),
648 (ctype->class256_collection[space_seq->bytes[0]]
649 & BIT (tok_blank)) == 0)))
650 {
651 if (!be_quiet)
f2b98f97
UD
652 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
653 valid_table[cnt].name));
4b10dd6c
UD
654 }
655 else if (((cnt = BITPOS (tok_punct),
656 (ctype->class256_collection[space_seq->bytes[0]]
657 & BIT (tok_punct)) != 0)
658 || (cnt = BITPOS (tok_graph),
659 (ctype->class256_collection[space_seq->bytes[0]]
660 & BIT (tok_graph)) != 0)))
661 {
662 if (!be_quiet)
f2b98f97
UD
663 WITH_CUR_LOCALE (error (0, 0, _("\
664<SP> character must not be in class `%s'"),
665 valid_table[cnt].name));
4b10dd6c
UD
666 }
667 else
668 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204 669
8a449450
UD
670 /* Check whether all single-byte characters make to their upper/lowercase
671 equivalent according to the ASCII rules. */
672 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
673 {
674 uint32_t uppval = ctype->map256_collection[0][cnt];
675 uint32_t lowval = ctype->map256_collection[1][cnt];
676 uint32_t lowuppval = ctype->map256_collection[0][lowval];
677 uint32_t lowlowval = ctype->map256_collection[1][lowval];
678
679 if (uppval != cnt
680 || lowval != cnt + 0x20
681 || lowuppval != cnt
682 || lowlowval != cnt + 0x20)
683 ctype->nonascii_case = 1;
684 }
685 for (cnt = 0; cnt < 256; ++cnt)
686 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
687 if (ctype->map256_collection[0][cnt] != cnt
688 || ctype->map256_collection[1][cnt] != cnt)
689 ctype->nonascii_case = 1;
690
75cd5204
RM
691 /* Now that the tests are done make sure the name array contains all
692 characters which are handled in the WIDTH section of the
693 character set definition file. */
4b10dd6c
UD
694 if (charmap->width_rules != NULL)
695 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 696 {
827ff758
UD
697 unsigned char bytes[charmap->mb_cur_max];
698 int nbytes = charmap->width_rules[cnt].from->nbytes;
699
700 /* We have the range of character for which the width is
701 specified described using byte sequences of the multibyte
702 charset. We have to convert this to UCS4 now. And we
703 cannot simply convert the beginning and the end of the
704 sequence, we have to iterate over the byte sequence and
705 convert it for every single character. */
706 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
707
708 while (nbytes < charmap->width_rules[cnt].to->nbytes
709 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
710 nbytes) <= 0)
711 {
712 /* Find the UCS value for `bytes'. */
827ff758 713 int inner;
76e680a8 714 uint32_t wch;
701666b7
UD
715 struct charseq *seq
716 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
76e680a8
UD
717
718 if (seq == NULL)
719 wch = ILLEGAL_CHAR_VALUE;
720 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
721 wch = seq->ucs4;
722 else
723 wch = repertoire_find_value (ctype->repertoire, seq->name,
724 strlen (seq->name));
827ff758
UD
725
726 if (wch != ILLEGAL_CHAR_VALUE)
727 /* We are only interested in the side-effects of the
728 `find_idx' call. It will add appropriate entries in
729 the name array if this is necessary. */
730 (void) find_idx (ctype, NULL, NULL, NULL, wch);
731
732 /* "Increment" the bytes sequence. */
733 inner = nbytes - 1;
734 while (inner >= 0 && bytes[inner] == 0xff)
735 --inner;
736
737 if (inner < 0)
738 {
739 /* We have to extend the byte sequence. */
740 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
741 break;
742
743 bytes[0] = 1;
744 memset (&bytes[1], 0, nbytes);
745 ++nbytes;
746 }
747 else
748 {
749 ++bytes[inner];
750 while (++inner < nbytes)
751 bytes[inner] = 0;
752 }
753 }
4b10dd6c
UD
754 }
755
0e16ecfa
UD
756 /* Now set all the other characters of the character set to the
757 default width. */
758 curs = NULL;
759 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
760 {
761 struct charseq *data = (struct charseq *) vdata;
762
763 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
764 data->ucs4 = repertoire_find_value (ctype->repertoire,
765 data->name, len);
766
767 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
768 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
769 }
770
4b10dd6c
UD
771 /* There must be a multiple of 10 digits. */
772 if (ctype->mbdigits_act % 10 != 0)
773 {
774 assert (ctype->mbdigits_act == ctype->wcdigits_act);
775 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
776 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
f2b98f97
UD
777 WITH_CUR_LOCALE (error (0, 0, _("\
778`digit' category has not entries in groups of ten")));
4b10dd6c
UD
779 }
780
781 /* Check the input digits. There must be a multiple of ten available.
42d7c593 782 In each group it could be that one or the other character is missing.
4b10dd6c
UD
783 In this case the whole group must be removed. */
784 cnt = 0;
785 while (cnt < ctype->mbdigits_act)
786 {
787 size_t inner;
788 for (inner = 0; inner < 10; ++inner)
789 if (ctype->mbdigits[cnt + inner] == NULL)
790 break;
791
792 if (inner == 10)
793 cnt += 10;
794 else
795 {
796 /* Remove the group. */
797 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
798 ((ctype->wcdigits_act - cnt - 10)
799 * sizeof (ctype->mbdigits[0])));
800 ctype->mbdigits_act -= 10;
801 }
802 }
803
804 /* If no input digits are given use the default. */
805 if (ctype->mbdigits_act == 0)
806 {
807 if (ctype->mbdigits_max == 0)
808 {
47e8b443 809 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
810 10 * sizeof (struct charseq *));
811 ctype->mbdigits_max = 10;
812 }
813
814 for (cnt = 0; cnt < 10; ++cnt)
815 {
816 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
701666b7 817 (char *) digits + cnt, 1);
4b10dd6c
UD
818 if (ctype->mbdigits[cnt] == NULL)
819 {
820 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
821 longnames[cnt],
822 strlen (longnames[cnt]));
823 if (ctype->mbdigits[cnt] == NULL)
824 {
825 /* Hum, this ain't good. */
f2b98f97
UD
826 WITH_CUR_LOCALE (error (0, 0, _("\
827no input digits defined and none of the standard names in the charmap")));
4b10dd6c 828
47e8b443 829 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
830 sizeof (struct charseq) + 1);
831
832 /* This is better than nothing. */
833 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
834 ctype->mbdigits[cnt]->nbytes = 1;
835 }
836 }
837 }
838
839 ctype->mbdigits_act = 10;
840 }
841
842 /* Check the wide character input digits. There must be a multiple
42d7c593 843 of ten available. In each group it could be that one or the other
4b10dd6c
UD
844 character is missing. In this case the whole group must be
845 removed. */
846 cnt = 0;
847 while (cnt < ctype->wcdigits_act)
848 {
849 size_t inner;
850 for (inner = 0; inner < 10; ++inner)
851 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
852 break;
853
854 if (inner == 10)
855 cnt += 10;
856 else
857 {
858 /* Remove the group. */
859 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
860 ((ctype->wcdigits_act - cnt - 10)
861 * sizeof (ctype->wcdigits[0])));
862 ctype->wcdigits_act -= 10;
863 }
864 }
865
866 /* If no input digits are given use the default. */
867 if (ctype->wcdigits_act == 0)
868 {
869 if (ctype->wcdigits_max == 0)
870 {
47e8b443 871 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
872 10 * sizeof (uint32_t));
873 ctype->wcdigits_max = 10;
874 }
875
876 for (cnt = 0; cnt < 10; ++cnt)
877 ctype->wcdigits[cnt] = L'0' + cnt;
878
879 ctype->mbdigits_act = 10;
880 }
881
882 /* Check the outdigits. */
883 warned = 0;
884 for (cnt = 0; cnt < 10; ++cnt)
885 if (ctype->mboutdigits[cnt] == NULL)
886 {
887 static struct charseq replace[2];
888
889 if (!warned)
890 {
f2b98f97
UD
891 WITH_CUR_LOCALE (error (0, 0, _("\
892not all characters used in `outdigit' are available in the charmap")));
4b10dd6c
UD
893 warned = 1;
894 }
895
896 replace[0].nbytes = 1;
897 replace[0].bytes[0] = '?';
898 replace[0].bytes[1] = '\0';
899 ctype->mboutdigits[cnt] = &replace[0];
900 }
901
902 warned = 0;
903 for (cnt = 0; cnt < 10; ++cnt)
904 if (ctype->wcoutdigits[cnt] == 0)
905 {
906 if (!warned)
907 {
f2b98f97
UD
908 WITH_CUR_LOCALE (error (0, 0, _("\
909not all characters used in `outdigit' are available in the repertoire")));
4b10dd6c
UD
910 warned = 1;
911 }
912
913 ctype->wcoutdigits[cnt] = L'?';
75cd5204 914 }
a8e4c924
UD
915
916 /* Sort the entries in the translit_ignore list. */
917 if (ctype->translit_ignore != NULL)
918 {
919 struct translit_ignore_t *firstp = ctype->translit_ignore;
920 struct translit_ignore_t *runp;
921
922 ctype->ntranslit_ignore = 1;
923
924 for (runp = firstp->next; runp != NULL; runp = runp->next)
925 {
926 struct translit_ignore_t *lastp = NULL;
927 struct translit_ignore_t *cmpp;
928
929 ++ctype->ntranslit_ignore;
930
931 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
932 if (runp->from < cmpp->from)
933 break;
934
935 runp->next = lastp;
936 if (lastp == NULL)
937 firstp = runp;
938 }
939
940 ctype->translit_ignore = firstp;
941 }
19bc17a9
RM
942}
943
944
945void
47e8b443 946ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
75cd5204 947 const char *output_path)
19bc17a9
RM
948{
949 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
a9706118 950 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
4c7d276e 951 + ctype->nr_charclass + ctype->map_collection_nr);
1ecbb381 952 struct locale_file file;
1d96d74d 953 uint32_t default_missing_len;
1ecbb381 954 size_t elem, cnt;
19bc17a9
RM
955
956 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 957 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9 958
1ecbb381
RS
959 default_missing_len = (ctype->default_missing
960 ? wcslen ((wchar_t *) ctype->default_missing)
961 : 0);
19bc17a9 962
1ecbb381 963 init_locale_data (&file, nelems);
19bc17a9
RM
964 for (elem = 0; elem < nelems; ++elem)
965 {
a9706118 966 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
19bc17a9
RM
967 switch (elem)
968 {
c6df09ad
UD
969#define CTYPE_EMPTY(name) \
970 case name: \
1ecbb381 971 add_locale_empty (&file); \
c6df09ad
UD
972 break
973
974 CTYPE_EMPTY(_NL_CTYPE_GAP1);
975 CTYPE_EMPTY(_NL_CTYPE_GAP2);
976 CTYPE_EMPTY(_NL_CTYPE_GAP3);
4c7d276e
UD
977 CTYPE_EMPTY(_NL_CTYPE_GAP4);
978 CTYPE_EMPTY(_NL_CTYPE_GAP5);
979 CTYPE_EMPTY(_NL_CTYPE_GAP6);
c6df09ad 980
1ecbb381 981#define CTYPE_RAW_DATA(name, base, size) \
19bc17a9 982 case _NL_ITEM_INDEX (name): \
1ecbb381 983 add_locale_raw_data (&file, base, size); \
19bc17a9
RM
984 break
985
1ecbb381
RS
986 CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
987 ctype->ctype_b,
988 (256 + 128) * sizeof (char_class_t));
19bc17a9 989
1ecbb381
RS
990#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
991 case _NL_ITEM_INDEX (name): \
992 add_locale_uint32_array (&file, base, n_elems); \
993 break
ef446144 994
1ecbb381
RS
995 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
996 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
997 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
998 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
999 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
1000 ctype->ctype32_b,
1001 256 * sizeof (char_class32_t));
ef446144 1002
1ecbb381
RS
1003#define CTYPE_UINT32(name, value) \
1004 case _NL_ITEM_INDEX (name): \
1005 add_locale_uint32 (&file, value); \
1006 break
4a33c2f5 1007
1ecbb381
RS
1008 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
1009 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
1010 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
4a33c2f5 1011
1ecbb381
RS
1012 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
1013 ctype->translit_from_idx,
1014 ctype->translit_idx_size);
4b10dd6c 1015
1ecbb381
RS
1016 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
1017 ctype->translit_from_tbl,
1018 ctype->translit_from_tbl_size
1019 / sizeof (uint32_t));
4b10dd6c 1020
1ecbb381
RS
1021 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
1022 ctype->translit_to_idx,
1023 ctype->translit_idx_size);
4b10dd6c 1024
1ecbb381
RS
1025 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
1026 ctype->translit_to_tbl,
1027 ctype->translit_to_tbl_size / sizeof (uint32_t));
4b10dd6c 1028
75cd5204
RM
1029 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
1030 /* The class name array. */
1ecbb381
RS
1031 start_locale_structure (&file);
1032 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1033 add_locale_string (&file, ctype->classnames[cnt]);
1034 add_locale_char (&file, 0);
7602d070 1035 align_locale_data (&file, LOCFILE_ALIGN);
1ecbb381 1036 end_locale_structure (&file);
75cd5204
RM
1037 break;
1038
1039 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1040 /* The class name array. */
1ecbb381
RS
1041 start_locale_structure (&file);
1042 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1043 add_locale_string (&file, ctype->mapnames[cnt]);
1044 add_locale_char (&file, 0);
7602d070 1045 align_locale_data (&file, LOCFILE_ALIGN);
1ecbb381 1046 end_locale_structure (&file);
75cd5204 1047 break;
19bc17a9 1048
1ecbb381
RS
1049 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1050 add_locale_wcwidth_table (&file, &ctype->width);
1051 break;
19bc17a9 1052
1ecbb381 1053 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
0200214b 1054
ce7a5ef4 1055 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1ecbb381 1056 add_locale_string (&file, ctype->codeset_name);
4b10dd6c
UD
1057 break;
1058
1ecbb381 1059 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
f0c7c524 1060
1ecbb381 1061 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
8a449450 1062
4a33c2f5 1063 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1ecbb381 1064 add_locale_uint32 (&file, ctype->mbdigits_act / 10);
4b10dd6c
UD
1065 break;
1066
4a33c2f5 1067 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1ecbb381 1068 add_locale_uint32 (&file, ctype->wcdigits_act / 10);
4b10dd6c
UD
1069 break;
1070
e43e0dd6 1071 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1ecbb381 1072 start_locale_structure (&file);
498b733e 1073 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1074 cnt < ctype->mbdigits_act; cnt += 10)
1075 {
1ecbb381
RS
1076 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1077 ctype->mbdigits[cnt]->nbytes);
1078 add_locale_char (&file, 0);
4b10dd6c 1079 }
1ecbb381 1080 end_locale_structure (&file);
4b10dd6c
UD
1081 break;
1082
1083 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1ecbb381 1084 start_locale_structure (&file);
498b733e 1085 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1ecbb381
RS
1086 add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1087 ctype->mboutdigits[cnt]->nbytes);
1088 add_locale_char (&file, 0);
1089 end_locale_structure (&file);
4b10dd6c
UD
1090 break;
1091
e43e0dd6 1092 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1ecbb381 1093 start_locale_structure (&file);
498b733e 1094 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
4b10dd6c 1095 cnt < ctype->wcdigits_act; cnt += 10)
1ecbb381
RS
1096 add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1097 end_locale_structure (&file);
4b10dd6c
UD
1098 break;
1099
1ecbb381 1100 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
498b733e 1101 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1ecbb381 1102 add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
4b10dd6c
UD
1103 break;
1104
a8e4c924 1105 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1ecbb381 1106 add_locale_uint32 (&file, default_missing_len);
a8e4c924
UD
1107 break;
1108
1d96d74d 1109 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1ecbb381
RS
1110 add_locale_uint32_array (&file, ctype->default_missing,
1111 default_missing_len);
1d96d74d
UD
1112 break;
1113
a8e4c924 1114 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1ecbb381 1115 add_locale_uint32 (&file, ctype->ntranslit_ignore);
a8e4c924
UD
1116 break;
1117
1118 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1ecbb381 1119 start_locale_structure (&file);
a8e4c924 1120 {
a8e4c924 1121 struct translit_ignore_t *runp;
a8e4c924
UD
1122 for (runp = ctype->translit_ignore; runp != NULL;
1123 runp = runp->next)
1124 {
1ecbb381
RS
1125 add_locale_uint32 (&file, runp->from);
1126 add_locale_uint32 (&file, runp->to);
1127 add_locale_uint32 (&file, runp->step);
a8e4c924
UD
1128 }
1129 }
1ecbb381 1130 end_locale_structure (&file);
1d96d74d
UD
1131 break;
1132
19bc17a9
RM
1133 default:
1134 assert (! "unknown CTYPE element");
1135 }
1136 else
1137 {
1138 /* Handle extra maps. */
4c7d276e
UD
1139 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1140 if (nr < ctype->nr_charclass)
ef446144 1141 {
1ecbb381
RS
1142 start_locale_prelude (&file);
1143 add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1144 end_locale_prelude (&file);
1145 add_locale_wctype_table (&file, &ctype->class_3level[nr]);
ef446144
UD
1146 }
1147 else
1148 {
4c7d276e
UD
1149 nr -= ctype->nr_charclass;
1150 assert (nr < ctype->map_collection_nr);
1ecbb381 1151 add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
ef446144 1152 }
75cd5204 1153 }
19bc17a9 1154 }
19bc17a9 1155
1ecbb381 1156 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
19bc17a9
RM
1157}
1158
1159
4b10dd6c
UD
1160/* Local functions. */
1161static void
1162ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1163 const char *name)
19bc17a9 1164{
4b10dd6c 1165 size_t cnt;
19bc17a9 1166
4b10dd6c
UD
1167 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1168 if (strcmp (ctype->classnames[cnt], name) == 0)
1169 break;
19bc17a9 1170
4b10dd6c
UD
1171 if (cnt < ctype->nr_charclass)
1172 {
1173 lr_error (lr, _("character class `%s' already defined"), name);
1174 return;
1175 }
19bc17a9 1176
4b10dd6c
UD
1177 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1178 /* Exit code 2 is prescribed in P1003.2b. */
f2b98f97 1179 WITH_CUR_LOCALE (error (2, 0, _("\
5d431a3e 1180implementation limit: no more than %Zd character classes allowed"),
f2b98f97 1181 MAX_NR_CHARCLASS));
19bc17a9 1182
4b10dd6c 1183 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1184}
1185
1186
4b10dd6c
UD
1187static void
1188ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
47e8b443 1189 const char *name, const struct charmap_t *charmap)
19bc17a9 1190{
4b10dd6c 1191 size_t max_chars = 0;
ba1ffaa1 1192 size_t cnt;
19bc17a9 1193
4b10dd6c 1194 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1195 {
4b10dd6c
UD
1196 if (strcmp (ctype->mapnames[cnt], name) == 0)
1197 break;
1198
1199 if (max_chars < ctype->map_collection_max[cnt])
1200 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1201 }
1202
4b10dd6c
UD
1203 if (cnt < ctype->map_collection_nr)
1204 {
1205 lr_error (lr, _("character map `%s' already defined"), name);
1206 return;
1207 }
19bc17a9 1208
4b10dd6c
UD
1209 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1210 /* Exit code 2 is prescribed in P1003.2b. */
f2b98f97 1211 WITH_CUR_LOCALE (error (2, 0, _("\
4b10dd6c 1212implementation limit: no more than %d character maps allowed"),
f2b98f97 1213 MAX_NR_CHARMAP));
19bc17a9 1214
4b10dd6c
UD
1215 ctype->mapnames[cnt] = name;
1216
1217 if (max_chars == 0)
1218 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1219 else
1220 ctype->map_collection_max[cnt] = max_chars;
1221
1222 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1223 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1224 ctype->map_collection_act[cnt] = 256;
19bc17a9 1225
4b10dd6c 1226 ++ctype->map_collection_nr;
19bc17a9
RM
1227}
1228
1229
4b10dd6c 1230/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1231 is possible if we only want to extend the name array. */
4b10dd6c
UD
1232static uint32_t *
1233find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1234 size_t *act, uint32_t idx)
19bc17a9 1235{
4b10dd6c 1236 size_t cnt;
19bc17a9 1237
4b10dd6c
UD
1238 if (idx < 256)
1239 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1240
601d2942
UD
1241 /* Use the charnames_idx lookup table instead of the slow search loop. */
1242#if 1
1243 cnt = idx_table_get (&ctype->charnames_idx, idx);
bd75759f 1244 if (cnt == EMPTY)
601d2942
UD
1245 /* Not found. */
1246 cnt = ctype->charnames_act;
1247#else
1248 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1249 if (ctype->charnames[cnt] == idx)
1250 break;
1251#endif
19bc17a9 1252
4b10dd6c
UD
1253 /* We have to distinguish two cases: the name is found or not. */
1254 if (cnt == ctype->charnames_act)
1255 {
1256 /* Extend the name array. */
1257 if (ctype->charnames_act == ctype->charnames_max)
1258 {
1259 ctype->charnames_max *= 2;
5866b131 1260 ctype->charnames = (uint32_t *)
4b10dd6c 1261 xrealloc (ctype->charnames,
5866b131 1262 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1263 }
1264 ctype->charnames[ctype->charnames_act++] = idx;
601d2942 1265 idx_table_add (&ctype->charnames_idx, idx, cnt);
4b10dd6c 1266 }
19bc17a9 1267
4b10dd6c
UD
1268 if (table == NULL)
1269 /* We have done everything we are asked to do. */
1270 return NULL;
19bc17a9 1271
4a9dcff1
UD
1272 if (max == NULL)
1273 /* The caller does not want to extend the table. */
1274 return (cnt >= *act ? NULL : &(*table)[cnt]);
1275
4b10dd6c
UD
1276 if (cnt >= *act)
1277 {
1278 if (cnt >= *max)
1279 {
1280 size_t old_max = *max;
1281 do
1282 *max *= 2;
1283 while (*max <= cnt);
19bc17a9 1284
4b10dd6c 1285 *table =
5866b131 1286 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1287 memset (&(*table)[old_max], '\0',
1288 (*max - old_max) * sizeof (uint32_t));
1289 }
19bc17a9 1290
76e680a8 1291 *act = cnt + 1;
4b10dd6c 1292 }
19bc17a9 1293
4b10dd6c 1294 return &(*table)[cnt];
19bc17a9
RM
1295}
1296
1297
4b10dd6c 1298static int
47e8b443 1299get_character (struct token *now, const struct charmap_t *charmap,
4b10dd6c
UD
1300 struct repertoire_t *repertoire,
1301 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1302{
4b10dd6c
UD
1303 if (now->tok == tok_bsymbol)
1304 {
1305 /* This will hopefully be the normal case. */
1306 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1307 now->val.str.lenmb);
1308 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1309 now->val.str.lenmb);
1310 }
1311 else if (now->tok == tok_ucs4)
1312 {
f0a4b6b1
UD
1313 char utmp[10];
1314
1315 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1316 *seqp = charmap_find_value (charmap, utmp, 9);
1317
1318 if (*seqp == NULL)
1319 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1320
4b10dd6c
UD
1321 if (*seqp == NULL)
1322 {
1323 /* Compute the value in the charmap from the UCS value. */
1324 const char *symbol = repertoire_find_symbol (repertoire,
1325 now->val.ucs4);
19bc17a9 1326
4b10dd6c
UD
1327 if (symbol == NULL)
1328 *seqp = NULL;
1329 else
1330 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1331
4b10dd6c
UD
1332 if (*seqp == NULL)
1333 {
723faa38
UD
1334 if (repertoire != NULL)
1335 {
1336 /* Insert a negative entry. */
1337 static const struct charseq negative
1338 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1339 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1340 sizeof (uint32_t));
1341 *newp = now->val.ucs4;
1342
1343 insert_entry (&repertoire->seq_table, newp,
1344 sizeof (uint32_t), (void *) &negative);
1345 }
4b10dd6c
UD
1346 }
1347 else
1348 (*seqp)->ucs4 = now->val.ucs4;
1349 }
1350 else if ((*seqp)->ucs4 != now->val.ucs4)
1351 *seqp = NULL;
19bc17a9 1352
4b10dd6c
UD
1353 *wchp = now->val.ucs4;
1354 }
1355 else if (now->tok == tok_charcode)
1356 {
1357 /* We must map from the byte code to UCS4. */
1358 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1359 now->val.str.lenmb);
19bc17a9 1360
4b10dd6c
UD
1361 if (*seqp == NULL)
1362 *wchp = ILLEGAL_CHAR_VALUE;
1363 else
1364 {
1365 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1366 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1367 strlen ((*seqp)->name));
1368 *wchp = (*seqp)->ucs4;
1369 }
1370 }
1371 else
1372 return 1;
19bc17a9
RM
1373
1374 return 0;
1375}
1376
1377
a0dc5206
UD
1378/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1379 the .(2). counterparts. */
4b10dd6c
UD
1380static void
1381charclass_symbolic_ellipsis (struct linereader *ldfile,
1382 struct locale_ctype_t *ctype,
47e8b443 1383 const struct charmap_t *charmap,
4b10dd6c
UD
1384 struct repertoire_t *repertoire,
1385 struct token *now,
1386 const char *last_str,
1387 unsigned long int class256_bit,
1388 unsigned long int class_bit, int base,
a0dc5206 1389 int ignore_content, int handle_digits, int step)
19bc17a9 1390{
4b10dd6c
UD
1391 const char *nowstr = now->val.str.startmb;
1392 char tmp[now->val.str.lenmb + 1];
1393 const char *cp;
1394 char *endp;
1395 unsigned long int from;
1396 unsigned long int to;
19bc17a9 1397
4b10dd6c
UD
1398 /* We have to compute the ellipsis values using the symbolic names. */
1399 assert (last_str != NULL);
1400
1401 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1402 {
4b10dd6c
UD
1403 invalid_range:
1404 lr_error (ldfile,
c69136ae 1405 _("`%s' and `%.*s' are not valid names for symbolic range"),
f6ada7ad 1406 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1407 return;
19bc17a9
RM
1408 }
1409
4b10dd6c
UD
1410 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1411 /* Nothing to do, the names are the same. */
1412 return;
19bc17a9 1413
4b10dd6c
UD
1414 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1415 ;
19bc17a9 1416
4b10dd6c
UD
1417 errno = 0;
1418 from = strtoul (cp, &endp, base);
1419 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1420 goto invalid_range;
19bc17a9 1421
4b10dd6c 1422 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1423 if ((to == UINT_MAX && errno == ERANGE)
1424 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1425 goto invalid_range;
19bc17a9 1426
4b10dd6c
UD
1427 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1428 if (!ignore_content)
1429 {
1430 now->val.str.startmb = tmp;
a0dc5206 1431 while ((from += step) <= to)
4b10dd6c
UD
1432 {
1433 struct charseq *seq;
1434 uint32_t wch;
19bc17a9 1435
9068de33 1436 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
4e8d52c9
AJ
1437 (int) (cp - last_str), last_str,
1438 (int) (now->val.str.lenmb - (cp - last_str)),
9068de33 1439 from);
19bc17a9 1440
4b10dd6c
UD
1441 get_character (now, charmap, repertoire, &seq, &wch);
1442
1443 if (seq != NULL && seq->nbytes == 1)
1444 /* Yep, we can store information about this byte sequence. */
1445 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1446
4b10dd6c
UD
1447 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1448 /* We have the UCS4 position. */
1449 *find_idx (ctype, &ctype->class_collection,
1450 &ctype->class_collection_max,
1451 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1452
4b10dd6c
UD
1453 if (handle_digits == 1)
1454 {
1455 /* We must store the digit values. */
1456 if (ctype->mbdigits_act == ctype->mbdigits_max)
1457 {
1458 ctype->mbdigits_max *= 2;
1459 ctype->mbdigits = xrealloc (ctype->mbdigits,
1460 (ctype->mbdigits_max
1461 * sizeof (char *)));
1462 ctype->wcdigits_max *= 2;
1463 ctype->wcdigits = xrealloc (ctype->wcdigits,
1464 (ctype->wcdigits_max
1465 * sizeof (uint32_t)));
1466 }
1467
1468 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1469 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1470 }
1471 else if (handle_digits == 2)
1472 {
1473 /* We must store the digit values. */
1474 if (ctype->outdigits_act >= 10)
1475 {
1476 lr_error (ldfile, _("\
1477%s: field `%s' does not contain exactly ten entries"),
1478 "LC_CTYPE", "outdigit");
1479 return;
1480 }
1481
1482 ctype->mboutdigits[ctype->outdigits_act] = seq;
1483 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1484 ++ctype->outdigits_act;
1485 }
1486 }
1487 }
19bc17a9
RM
1488}
1489
1490
a0dc5206 1491/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
4b10dd6c
UD
1492static void
1493charclass_ucs4_ellipsis (struct linereader *ldfile,
1494 struct locale_ctype_t *ctype,
47e8b443 1495 const struct charmap_t *charmap,
4b10dd6c
UD
1496 struct repertoire_t *repertoire,
1497 struct token *now, uint32_t last_wch,
1498 unsigned long int class256_bit,
1499 unsigned long int class_bit, int ignore_content,
a0dc5206 1500 int handle_digits, int step)
19bc17a9 1501{
4b10dd6c 1502 if (last_wch > now->val.ucs4)
19bc17a9 1503 {
4b10dd6c
UD
1504 lr_error (ldfile, _("\
1505to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1506 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1507 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1508 return;
1509 }
1510
4b10dd6c 1511 if (!ignore_content)
a0dc5206 1512 while ((last_wch += step) <= now->val.ucs4)
4b10dd6c
UD
1513 {
1514 /* We have to find out whether there is a byte sequence corresponding
1515 to this UCS4 value. */
f0a4b6b1
UD
1516 struct charseq *seq;
1517 char utmp[10];
1518
1519 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1520 seq = charmap_find_value (charmap, utmp, 9);
a0dc5206
UD
1521 if (seq == NULL)
1522 {
1523 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1524 seq = charmap_find_value (charmap, utmp, 5);
1525 }
f0a4b6b1
UD
1526
1527 if (seq == NULL)
1528 /* Try looking in the repertoire map. */
1529 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1530
4b10dd6c
UD
1531 /* If this is the first time we look for this sequence create a new
1532 entry. */
1533 if (seq == NULL)
1534 {
f0a4b6b1
UD
1535 static const struct charseq negative
1536 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1537
f0a4b6b1
UD
1538 /* Find the symbolic name for this UCS4 value. */
1539 if (repertoire != NULL)
4b10dd6c 1540 {
f0a4b6b1
UD
1541 const char *symbol = repertoire_find_symbol (repertoire,
1542 last_wch);
5866b131
UD
1543 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1544 sizeof (uint32_t));
f0a4b6b1
UD
1545 *newp = last_wch;
1546
1547 if (symbol != NULL)
1548 /* We have a name, now search the multibyte value. */
1549 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1550
1551 if (seq == NULL)
1552 /* We have to create a fake entry. */
1553 seq = (struct charseq *) &negative;
1554 else
1555 seq->ucs4 = last_wch;
1556
5866b131
UD
1557 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1558 seq);
4b10dd6c
UD
1559 }
1560 else
f0a4b6b1
UD
1561 /* We have to create a fake entry. */
1562 seq = (struct charseq *) &negative;
4b10dd6c
UD
1563 }
1564
1565 /* We have a name, now search the multibyte value. */
1566 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1567 /* Yep, we can store information about this byte sequence. */
1568 ctype->class256_collection[(size_t) seq->bytes[0]]
1569 |= class256_bit;
1570
1571 /* And of course we have the UCS4 position. */
5866b131 1572 if (class_bit != 0)
4b10dd6c
UD
1573 *find_idx (ctype, &ctype->class_collection,
1574 &ctype->class_collection_max,
1575 &ctype->class_collection_act, last_wch) |= class_bit;
1576
1577 if (handle_digits == 1)
1578 {
1579 /* We must store the digit values. */
1580 if (ctype->mbdigits_act == ctype->mbdigits_max)
1581 {
1582 ctype->mbdigits_max *= 2;
1583 ctype->mbdigits = xrealloc (ctype->mbdigits,
1584 (ctype->mbdigits_max
1585 * sizeof (char *)));
1586 ctype->wcdigits_max *= 2;
1587 ctype->wcdigits = xrealloc (ctype->wcdigits,
1588 (ctype->wcdigits_max
1589 * sizeof (uint32_t)));
1590 }
1591
1592 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1593 ? seq : NULL);
1594 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1595 }
1596 else if (handle_digits == 2)
1597 {
1598 /* We must store the digit values. */
1599 if (ctype->outdigits_act >= 10)
1600 {
1601 lr_error (ldfile, _("\
1602%s: field `%s' does not contain exactly ten entries"),
1603 "LC_CTYPE", "outdigit");
1604 return;
1605 }
19bc17a9 1606
4b10dd6c
UD
1607 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1608 ? seq : NULL);
1609 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1610 ++ctype->outdigits_act;
1611 }
1612 }
19bc17a9
RM
1613}
1614
1615
4b10dd6c 1616/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1617static void
4b10dd6c
UD
1618charclass_charcode_ellipsis (struct linereader *ldfile,
1619 struct locale_ctype_t *ctype,
47e8b443 1620 const struct charmap_t *charmap,
4b10dd6c
UD
1621 struct repertoire_t *repertoire,
1622 struct token *now, char *last_charcode,
1623 uint32_t last_charcode_len,
1624 unsigned long int class256_bit,
1625 unsigned long int class_bit, int ignore_content,
1626 int handle_digits)
19bc17a9 1627{
4b10dd6c
UD
1628 /* First check whether the to-value is larger. */
1629 if (now->val.charcode.nbytes != last_charcode_len)
1630 {
1631 lr_error (ldfile, _("\
379ed351 1632start and end character sequence of range must have the same length"));
4b10dd6c
UD
1633 return;
1634 }
19bc17a9 1635
4b10dd6c 1636 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1637 {
4b10dd6c
UD
1638 lr_error (ldfile, _("\
1639to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1640 return;
1641 }
1642
4b10dd6c
UD
1643 if (!ignore_content)
1644 {
1645 do
1646 {
1647 /* Increment the byte sequence value. */
1648 struct charseq *seq;
1649 uint32_t wch;
1650 int i;
1651
1652 for (i = last_charcode_len - 1; i >= 0; --i)
1653 if (++last_charcode[i] != 0)
1654 break;
1655
1656 if (last_charcode_len == 1)
1657 /* Of course we have the charcode value. */
1658 ctype->class256_collection[(size_t) last_charcode[0]]
1659 |= class256_bit;
1660
1661 /* Find the symbolic name. */
1662 seq = charmap_find_symbol (charmap, last_charcode,
1663 last_charcode_len);
1664 if (seq != NULL)
1665 {
1666 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1667 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1668 strlen (seq->name));
f0a4b6b1 1669 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1670
1671 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1672 *find_idx (ctype, &ctype->class_collection,
1673 &ctype->class_collection_max,
1674 &ctype->class_collection_act, wch) |= class_bit;
1675 }
1676 else
1677 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1678
4b10dd6c
UD
1679 if (handle_digits == 1)
1680 {
1681 /* We must store the digit values. */
1682 if (ctype->mbdigits_act == ctype->mbdigits_max)
1683 {
1684 ctype->mbdigits_max *= 2;
1685 ctype->mbdigits = xrealloc (ctype->mbdigits,
1686 (ctype->mbdigits_max
1687 * sizeof (char *)));
1688 ctype->wcdigits_max *= 2;
1689 ctype->wcdigits = xrealloc (ctype->wcdigits,
1690 (ctype->wcdigits_max
1691 * sizeof (uint32_t)));
1692 }
1693
1694 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1695 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1696 seq->nbytes = last_charcode_len;
1697
1698 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1699 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1700 }
1701 else if (handle_digits == 2)
1702 {
1703 struct charseq *seq;
1704 /* We must store the digit values. */
1705 if (ctype->outdigits_act >= 10)
1706 {
1707 lr_error (ldfile, _("\
1708%s: field `%s' does not contain exactly ten entries"),
1709 "LC_CTYPE", "outdigit");
1710 return;
1711 }
1712
1713 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1714 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1715 seq->nbytes = last_charcode_len;
1716
1717 ctype->mboutdigits[ctype->outdigits_act] = seq;
1718 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1719 ++ctype->outdigits_act;
1720 }
1721 }
1722 while (memcmp (last_charcode, now->val.charcode.bytes,
1723 last_charcode_len) != 0);
1724 }
19bc17a9
RM
1725}
1726
1727
47e8b443
UD
1728static uint32_t *
1729find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1730 uint32_t wch)
1731{
1732 struct translit_t *trunp = ctype->translit;
1733 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1734
1735 while (trunp != NULL)
1736 {
1737 /* XXX We simplify things here. The transliterations we look
1738 for are only allowed to have one character. */
1739 if (trunp->from[0] == wch && trunp->from[1] == 0)
1740 {
1741 /* Found it. Now look for a transliteration which can be
1742 represented with the character set. */
1743 struct translit_to_t *torunp = trunp->to;
1744
1745 while (torunp != NULL)
1746 {
1747 int i;
1748
1749 for (i = 0; torunp->str[i] != 0; ++i)
1750 {
1751 char utmp[10];
1752
1753 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1754 if (charmap_find_value (charmap, utmp, 9) == NULL)
1755 /* This character cannot be represented. */
1756 break;
1757 }
1758
1759 if (torunp->str[i] == 0)
1760 return torunp->str;
1761
1762 torunp = torunp->next;
1763 }
1764
1765 break;
1766 }
1767
1768 trunp = trunp->next;
1769 }
1770
1771 /* Check for ignored chars. */
1772 while (tirunp != NULL)
1773 {
1774 if (tirunp->from <= wch && tirunp->to >= wch)
1775 {
1776 uint32_t wi;
1777
1778 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1779 if (wi == wch)
1780 return (uint32_t []) { 0 };
1781 }
1782 }
1783
1784 /* Nothing found. */
1785 return NULL;
1786}
1787
1788
1789uint32_t *
1790find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1791 uint32_t wch)
1792{
1793 struct locale_ctype_t *ctype;
1794 uint32_t *result = NULL;
1795
1796 assert (locale != NULL);
1797 ctype = locale->categories[LC_CTYPE].ctype;
1798
b037a293
UD
1799 if (ctype == NULL)
1800 return NULL;
1801
47e8b443
UD
1802 if (ctype->translit != NULL)
1803 result = find_translit2 (ctype, charmap, wch);
1804
1805 if (result == NULL)
1806 {
1807 struct translit_include_t *irunp = ctype->translit_include;
1808
1809 while (irunp != NULL && result == NULL)
1810 {
1811 result = find_translit (find_locale (CTYPE_LOCALE,
1812 irunp->copy_locale,
1813 irunp->copy_repertoire,
1814 charmap),
1815 charmap, wch);
1816 irunp = irunp->next;
1817 }
1818 }
1819
1820 return result;
1821}
1822
1823
4b10dd6c
UD
1824/* Read one transliteration entry. */
1825static uint32_t *
1826read_widestring (struct linereader *ldfile, struct token *now,
47e8b443
UD
1827 const struct charmap_t *charmap,
1828 struct repertoire_t *repertoire)
19bc17a9 1829{
4b10dd6c 1830 uint32_t *wstr;
19bc17a9 1831
4b10dd6c
UD
1832 if (now->tok == tok_default_missing)
1833 /* The special name "" will denote this case. */
5866b131 1834 wstr = ((uint32_t *) { 0 });
4b10dd6c 1835 else if (now->tok == tok_bsymbol)
19bc17a9 1836 {
4b10dd6c 1837 /* Get the value from the repertoire. */
a673fbcb 1838 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1839 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1840 now->val.str.lenmb);
1841 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1842 {
1843 /* We cannot proceed, we don't know the UCS4 value. */
1844 free (wstr);
1845 return NULL;
1846 }
4b10dd6c
UD
1847
1848 wstr[1] = 0;
19bc17a9 1849 }
4b10dd6c 1850 else if (now->tok == tok_ucs4)
19bc17a9 1851 {
a673fbcb 1852 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1853 wstr[0] = now->val.ucs4;
1854 wstr[1] = 0;
1855 }
1856 else if (now->tok == tok_charcode)
1857 {
1858 /* Argh, we have to convert to the symbol name first and then to the
1859 UCS4 value. */
1860 struct charseq *seq = charmap_find_symbol (charmap,
1861 now->val.str.startmb,
1862 now->val.str.lenmb);
1863 if (seq == NULL)
1864 /* Cannot find the UCS4 value. */
1865 return NULL;
1866
1867 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1868 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1869 strlen (seq->name));
1870 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1871 /* We cannot proceed, we don't know the UCS4 value. */
1872 return NULL;
1873
a673fbcb 1874 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1875 wstr[0] = seq->ucs4;
1876 wstr[1] = 0;
1877 }
1878 else if (now->tok == tok_string)
1879 {
1880 wstr = now->val.str.startwc;
a673fbcb 1881 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1882 return NULL;
1883 }
1884 else
1885 {
1886 if (now->tok != tok_eol && now->tok != tok_eof)
1887 lr_ignore_rest (ldfile, 0);
1888 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1889 return (uint32_t *) -1l;
19bc17a9
RM
1890 }
1891
4b10dd6c
UD
1892 return wstr;
1893}
19bc17a9 1894
19bc17a9 1895
4b10dd6c
UD
1896static void
1897read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
47e8b443 1898 struct token *now, const struct charmap_t *charmap,
4b10dd6c
UD
1899 struct repertoire_t *repertoire)
1900{
1901 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1902 struct translit_t *result;
1903 struct translit_to_t **top;
a673fbcb 1904 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1905 int first;
1906 int ignore;
1907
1908 if (from_wstr == NULL)
1909 /* There is no valid from string. */
1910 return;
19bc17a9 1911
4b10dd6c
UD
1912 result = (struct translit_t *) obstack_alloc (ob,
1913 sizeof (struct translit_t));
1914 result->from = from_wstr;
a673fbcb
UD
1915 result->fname = ldfile->fname;
1916 result->lineno = ldfile->lineno;
4b10dd6c
UD
1917 result->next = NULL;
1918 result->to = NULL;
1919 top = &result->to;
1920 first = 1;
1921 ignore = 0;
1922
1923 while (1)
1924 {
1925 uint32_t *to_wstr;
1926
1927 /* Next we have one or more transliterations. They are
1928 separated by semicolons. */
47e8b443 1929 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
4b10dd6c
UD
1930
1931 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1932 {
1933 /* One string read. */
1934 const uint32_t zero = 0;
1935
1936 if (!ignore)
1937 {
1938 obstack_grow (ob, &zero, 4);
1939 to_wstr = obstack_finish (ob);
1940
1941 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1942 (*top)->str = to_wstr;
1943 (*top)->next = NULL;
1944 }
1945
1946 if (now->tok == tok_eol)
1947 {
1948 result->next = ctype->translit;
1949 ctype->translit = result;
1950 return;
1951 }
1952
1953 if (!ignore)
1954 top = &(*top)->next;
1955 ignore = 0;
1956 }
1957 else
1958 {
1959 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1960 if (to_wstr == (uint32_t *) -1l)
1961 {
1962 /* An error occurred. */
1963 obstack_free (ob, result);
1964 return;
1965 }
1966
1967 if (to_wstr == NULL)
1968 ignore = 1;
1969 else
1970 /* This value is usable. */
1971 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1972
4b10dd6c
UD
1973 first = 0;
1974 }
1975 }
19bc17a9
RM
1976}
1977
1978
a673fbcb
UD
1979static void
1980read_translit_ignore_entry (struct linereader *ldfile,
1981 struct locale_ctype_t *ctype,
47e8b443 1982 const struct charmap_t *charmap,
a673fbcb
UD
1983 struct repertoire_t *repertoire)
1984{
1985 /* We expect a semicolon-separated list of characters we ignore. We are
1986 only interested in the wide character definitions. These must be
1987 single characters, possibly defining a range when an ellipsis is used. */
1988 while (1)
1989 {
47e8b443
UD
1990 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1991 verbose);
a673fbcb
UD
1992 struct translit_ignore_t *newp;
1993 uint32_t from;
1994
1995 if (now->tok == tok_eol || now->tok == tok_eof)
1996 {
1997 lr_error (ldfile,
1998 _("premature end of `translit_ignore' definition"));
1999 return;
2000 }
2001
2002 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2003 {
2004 lr_error (ldfile, _("syntax error"));
2005 lr_ignore_rest (ldfile, 0);
2006 return;
2007 }
2008
2009 if (now->tok == tok_ucs4)
2010 from = now->val.ucs4;
2011 else
f0a4b6b1
UD
2012 /* Try to get the value. */
2013 from = repertoire_find_value (repertoire, now->val.str.startmb,
2014 now->val.str.lenmb);
a673fbcb
UD
2015
2016 if (from == ILLEGAL_CHAR_VALUE)
2017 {
2018 lr_error (ldfile, "invalid character name");
2019 newp = NULL;
2020 }
2021 else
2022 {
2023 newp = (struct translit_ignore_t *)
2024 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2025 newp->from = from;
2026 newp->to = from;
a0dc5206 2027 newp->step = 1;
a673fbcb
UD
2028
2029 newp->next = ctype->translit_ignore;
2030 ctype->translit_ignore = newp;
2031 }
2032
2033 /* Now we expect either a semicolon, an ellipsis, or the end of the
2034 line. */
47e8b443 2035 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb 2036
a0dc5206 2037 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
a673fbcb
UD
2038 {
2039 /* XXX Should we bother implementing `....'? `...' certainly
2040 will not be implemented. */
2041 uint32_t to;
a0dc5206 2042 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
a673fbcb 2043
47e8b443 2044 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb
UD
2045
2046 if (now->tok == tok_eol || now->tok == tok_eof)
2047 {
2048 lr_error (ldfile,
2049 _("premature end of `translit_ignore' definition"));
2050 return;
2051 }
2052
2053 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2054 {
2055 lr_error (ldfile, _("syntax error"));
2056 lr_ignore_rest (ldfile, 0);
2057 return;
2058 }
2059
2060 if (now->tok == tok_ucs4)
2061 to = now->val.ucs4;
2062 else
f0a4b6b1
UD
2063 /* Try to get the value. */
2064 to = repertoire_find_value (repertoire, now->val.str.startmb,
2065 now->val.str.lenmb);
a673fbcb
UD
2066
2067 if (to == ILLEGAL_CHAR_VALUE)
2068 lr_error (ldfile, "invalid character name");
2069 else
2070 {
2071 /* Make sure the `to'-value is larger. */
2072 if (to >= from)
a0dc5206
UD
2073 {
2074 newp->to = to;
2075 newp->step = step;
2076 }
a673fbcb
UD
2077 else
2078 lr_error (ldfile, _("\
2079to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2080 (to | from) < 65536 ? 4 : 8, to,
2081 (to | from) < 65536 ? 4 : 8, from);
2082 }
2083
2084 /* And the next token. */
47e8b443 2085 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb
UD
2086 }
2087
2088 if (now->tok == tok_eol || now->tok == tok_eof)
2089 /* We are done. */
2090 return;
2091
2092 if (now->tok == tok_semicolon)
2093 /* Next round. */
2094 continue;
2095
2096 /* If we come here something is wrong. */
2097 lr_error (ldfile, _("syntax error"));
2098 lr_ignore_rest (ldfile, 0);
2099 return;
2100 }
2101}
2102
2103
4b10dd6c
UD
2104/* The parser for the LC_CTYPE section of the locale definition. */
2105void
2106ctype_read (struct linereader *ldfile, struct localedef_t *result,
47e8b443 2107 const struct charmap_t *charmap, const char *repertoire_name,
4b10dd6c 2108 int ignore_content)
19bc17a9 2109{
4b10dd6c
UD
2110 struct repertoire_t *repertoire = NULL;
2111 struct locale_ctype_t *ctype;
2112 struct token *now;
2113 enum token_t nowtok;
19bc17a9 2114 size_t cnt;
4b10dd6c
UD
2115 uint32_t last_wch = 0;
2116 enum token_t last_token;
2117 enum token_t ellipsis_token;
a0dc5206 2118 int step;
4b10dd6c
UD
2119 char last_charcode[16];
2120 size_t last_charcode_len = 0;
2121 const char *last_str = NULL;
2122 int mapidx;
a6bd56c7 2123 struct localedef_t *copy_locale = NULL;
19bc17a9 2124
4b10dd6c
UD
2125 /* Get the repertoire we have to use. */
2126 if (repertoire_name != NULL)
2127 repertoire = repertoire_read (repertoire_name);
19bc17a9 2128
4b10dd6c
UD
2129 /* The rest of the line containing `LC_CTYPE' must be free. */
2130 lr_ignore_rest (ldfile, 1);
19bc17a9 2131
4b10dd6c
UD
2132
2133 do
19bc17a9 2134 {
47e8b443 2135 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2136 nowtok = now->tok;
19bc17a9 2137 }
4b10dd6c 2138 while (nowtok == tok_eol);
19bc17a9 2139
4b10dd6c
UD
2140 /* If we see `copy' now we are almost done. */
2141 if (nowtok == tok_copy)
2142 {
47e8b443 2143 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7
UD
2144 if (now->tok != tok_string)
2145 {
2146 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2147
2148 skip_category:
2149 do
47e8b443 2150 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7
UD
2151 while (now->tok != tok_eof && now->tok != tok_end);
2152
2153 if (now->tok != tok_eof
47e8b443 2154 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
93693c4d 2155 now->tok == tok_eof))
a6bd56c7
UD
2156 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2157 else if (now->tok != tok_lc_ctype)
2158 {
2159 lr_error (ldfile, _("\
2160%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2161 lr_ignore_rest (ldfile, 0);
2162 }
2163 else
2164 lr_ignore_rest (ldfile, 1);
2165
2166 return;
2167 }
2168
2169 if (! ignore_content)
2170 {
2171 /* Get the locale definition. */
2172 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2173 repertoire_name, charmap, NULL);
2174 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2175 {
2176 /* Not yet loaded. So do it now. */
2177 if (locfile_read (copy_locale, charmap) != 0)
2178 goto skip_category;
2179 }
71663747
UD
2180
2181 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2182 return;
a6bd56c7
UD
2183 }
2184
2185 lr_ignore_rest (ldfile, 1);
2186
47e8b443 2187 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7 2188 nowtok = now->tok;
4b10dd6c 2189 }
75cd5204 2190
4b10dd6c 2191 /* Prepare the data structures. */
a6bd56c7 2192 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
4b10dd6c
UD
2193 ctype = result->categories[LC_CTYPE].ctype;
2194
2195 /* Remember the repertoire we use. */
2196 if (!ignore_content)
2197 ctype->repertoire = repertoire;
2198
2199 while (1)
19bc17a9 2200 {
4b10dd6c
UD
2201 unsigned long int class_bit = 0;
2202 unsigned long int class256_bit = 0;
2203 int handle_digits = 0;
2204
2205 /* Of course we don't proceed beyond the end of file. */
2206 if (nowtok == tok_eof)
2207 break;
2208
2209 /* Ingore empty lines. */
2210 if (nowtok == tok_eol)
19bc17a9 2211 {
47e8b443 2212 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2213 nowtok = now->tok;
2214 continue;
2215 }
19bc17a9 2216
4b10dd6c
UD
2217 switch (nowtok)
2218 {
5491da0d 2219 case tok_charclass:
47e8b443 2220 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2221 while (now->tok == tok_ident || now->tok == tok_string)
2222 {
2223 ctype_class_new (ldfile, ctype, now->val.str.startmb);
47e8b443 2224 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2225 if (now->tok != tok_semicolon)
2226 break;
47e8b443 2227 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2228 }
2229 if (now->tok != tok_eol)
2230 SYNTAX_ERROR (_("\
2231%s: syntax error in definition of new character class"), "LC_CTYPE");
2232 break;
2233
2234 case tok_charconv:
47e8b443 2235 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2236 while (now->tok == tok_ident || now->tok == tok_string)
2237 {
2238 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
47e8b443 2239 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2240 if (now->tok != tok_semicolon)
2241 break;
47e8b443 2242 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2243 }
2244 if (now->tok != tok_eol)
2245 SYNTAX_ERROR (_("\
2246%s: syntax error in definition of new character map"), "LC_CTYPE");
2247 break;
2248
4b10dd6c 2249 case tok_class:
b9eb05d6
UD
2250 /* Ignore the rest of the line if we don't need the input of
2251 this line. */
2252 if (ignore_content)
2253 {
2254 lr_ignore_rest (ldfile, 0);
2255 break;
2256 }
2257
4b10dd6c
UD
2258 /* We simply forget the `class' keyword and use the following
2259 operand to determine the bit. */
47e8b443 2260 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2261 if (now->tok == tok_ident || now->tok == tok_string)
2262 {
87372aa9 2263 /* Must can be one of the predefined class names. */
4b10dd6c
UD
2264 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2265 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2266 break;
2267 if (cnt >= ctype->nr_charclass)
2268 {
011ebfab 2269#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2270 if (now->val.str.lenmb == 8
2271 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2272 class_bit = _ISwspecial1;
2273 else if (now->val.str.lenmb == 8
2274 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2275 class_bit = _ISwspecial2;
2276 else if (now->val.str.lenmb == 8
2277 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2278 class_bit = _ISwspecial3;
2279 else
011ebfab 2280#endif
4b10dd6c 2281 {
87372aa9
UD
2282 /* OK, it's a new class. */
2283 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 2284
87372aa9 2285 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
2286 }
2287 }
2288 else
7f653277
UD
2289 {
2290 class_bit = _ISwbit (cnt);
4b10dd6c 2291
7f653277
UD
2292 free (now->val.str.startmb);
2293 }
4b10dd6c
UD
2294 }
2295 else if (now->tok == tok_digit)
2296 goto handle_tok_digit;
2297 else if (now->tok < tok_upper || now->tok > tok_blank)
2298 goto err_label;
2299 else
2300 {
2301 class_bit = BITw (now->tok);
2302 class256_bit = BIT (now->tok);
2303 }
2304
2305 /* The next character must be a semicolon. */
47e8b443 2306 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2307 if (now->tok != tok_semicolon)
2308 goto err_label;
2309 goto read_charclass;
2310
2311 case tok_upper:
2312 case tok_lower:
2313 case tok_alpha:
2314 case tok_alnum:
2315 case tok_space:
2316 case tok_cntrl:
2317 case tok_punct:
2318 case tok_graph:
2319 case tok_print:
2320 case tok_xdigit:
2321 case tok_blank:
b9eb05d6
UD
2322 /* Ignore the rest of the line if we don't need the input of
2323 this line. */
2324 if (ignore_content)
2325 {
2326 lr_ignore_rest (ldfile, 0);
2327 break;
2328 }
2329
4b10dd6c
UD
2330 class_bit = BITw (now->tok);
2331 class256_bit = BIT (now->tok);
2332 handle_digits = 0;
2333 read_charclass:
2334 ctype->class_done |= class_bit;
2335 last_token = tok_none;
2336 ellipsis_token = tok_none;
a0dc5206 2337 step = 1;
47e8b443 2338 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2339 while (now->tok != tok_eol && now->tok != tok_eof)
2340 {
2341 uint32_t wch;
2342 struct charseq *seq;
2343
2344 if (ellipsis_token == tok_none)
2345 {
2346 if (get_character (now, charmap, repertoire, &seq, &wch))
2347 goto err_label;
2348
2349 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2350 /* Yep, we can store information about this byte
2351 sequence. */
2352 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2353
2354 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2355 && class_bit != 0)
2356 /* We have the UCS4 position. */
2357 *find_idx (ctype, &ctype->class_collection,
2358 &ctype->class_collection_max,
2359 &ctype->class_collection_act, wch) |= class_bit;
2360
2361 last_token = now->tok;
549b3c3a 2362 /* Terminate the string. */
9e2b7438
UD
2363 if (last_token == tok_bsymbol)
2364 {
2365 now->val.str.startmb[now->val.str.lenmb] = '\0';
2366 last_str = now->val.str.startmb;
2367 }
2368 else
2369 last_str = NULL;
4b10dd6c
UD
2370 last_wch = wch;
2371 memcpy (last_charcode, now->val.charcode.bytes, 16);
2372 last_charcode_len = now->val.charcode.nbytes;
2373
2374 if (!ignore_content && handle_digits == 1)
2375 {
2376 /* We must store the digit values. */
2377 if (ctype->mbdigits_act == ctype->mbdigits_max)
2378 {
b9eb05d6 2379 ctype->mbdigits_max += 10;
4b10dd6c
UD
2380 ctype->mbdigits = xrealloc (ctype->mbdigits,
2381 (ctype->mbdigits_max
2382 * sizeof (char *)));
b9eb05d6 2383 ctype->wcdigits_max += 10;
4b10dd6c
UD
2384 ctype->wcdigits = xrealloc (ctype->wcdigits,
2385 (ctype->wcdigits_max
2386 * sizeof (uint32_t)));
2387 }
2388
2389 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2390 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2391 }
2392 else if (!ignore_content && handle_digits == 2)
2393 {
2394 /* We must store the digit values. */
2395 if (ctype->outdigits_act >= 10)
2396 {
2397 lr_error (ldfile, _("\
2398%s: field `%s' does not contain exactly ten entries"),
2399 "LC_CTYPE", "outdigit");
69c69fe1
UD
2400 lr_ignore_rest (ldfile, 0);
2401 break;
4b10dd6c
UD
2402 }
2403
2404 ctype->mboutdigits[ctype->outdigits_act] = seq;
2405 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2406 ++ctype->outdigits_act;
2407 }
2408 }
2409 else
2410 {
2411 /* Now it gets complicated. We have to resolve the
2412 ellipsis problem. First we must distinguish between
2413 the different kind of ellipsis and this must match the
2414 tokens we have seen. */
2415 assert (last_token != tok_none);
2416
2417 if (last_token != now->tok)
2418 {
2419 lr_error (ldfile, _("\
2420ellipsis range must be marked by two operands of same type"));
2421 lr_ignore_rest (ldfile, 0);
2422 break;
2423 }
2424
2425 if (last_token == tok_bsymbol)
2426 {
2427 if (ellipsis_token == tok_ellipsis3)
2428 lr_error (ldfile, _("with symbolic name range values \
2429the absolute ellipsis `...' must not be used"));
2430
2431 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2432 repertoire, now, last_str,
2433 class256_bit, class_bit,
2434 (ellipsis_token
2435 == tok_ellipsis4
2436 ? 10 : 16),
2437 ignore_content,
a0dc5206 2438 handle_digits, step);
4b10dd6c
UD
2439 }
2440 else if (last_token == tok_ucs4)
2441 {
2442 if (ellipsis_token != tok_ellipsis2)
2443 lr_error (ldfile, _("\
2444with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2445
2446 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2447 repertoire, now, last_wch,
2448 class256_bit, class_bit,
a0dc5206
UD
2449 ignore_content, handle_digits,
2450 step);
4b10dd6c
UD
2451 }
2452 else
2453 {
2454 assert (last_token == tok_charcode);
2455
2456 if (ellipsis_token != tok_ellipsis3)
2457 lr_error (ldfile, _("\
2458with character code range values one must use the absolute ellipsis `...'"));
2459
2460 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2461 repertoire, now,
2462 last_charcode,
2463 last_charcode_len,
2464 class256_bit, class_bit,
2465 ignore_content,
2466 handle_digits);
2467 }
2468
2469 /* Now we have used the last value. */
2470 last_token = tok_none;
2471 }
2472
2473 /* Next we expect a semicolon or the end of the line. */
47e8b443 2474 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2475 if (now->tok == tok_eol || now->tok == tok_eof)
2476 break;
2477
2478 if (last_token != tok_none
a0dc5206 2479 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
4b10dd6c 2480 {
a0dc5206
UD
2481 if (now->tok == tok_ellipsis2_2)
2482 {
2483 now->tok = tok_ellipsis2;
2484 step = 2;
2485 }
2486 else if (now->tok == tok_ellipsis4_2)
2487 {
2488 now->tok = tok_ellipsis4;
2489 step = 2;
2490 }
2491
4b10dd6c 2492 ellipsis_token = now->tok;
a0dc5206 2493
47e8b443 2494 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2495 continue;
2496 }
2497
2498 if (now->tok != tok_semicolon)
2499 goto err_label;
2500
2501 /* And get the next character. */
47e8b443 2502 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2503
2504 ellipsis_token = tok_none;
a0dc5206 2505 step = 1;
4b10dd6c
UD
2506 }
2507 break;
2508
2509 case tok_digit:
b9eb05d6
UD
2510 /* Ignore the rest of the line if we don't need the input of
2511 this line. */
2512 if (ignore_content)
42d7c593
UD
2513 {
2514 lr_ignore_rest (ldfile, 0);
2515 break;
2516 }
b9eb05d6 2517
4b10dd6c
UD
2518 handle_tok_digit:
2519 class_bit = _ISwdigit;
2520 class256_bit = _ISdigit;
2521 handle_digits = 1;
2522 goto read_charclass;
2523
2524 case tok_outdigit:
b9eb05d6
UD
2525 /* Ignore the rest of the line if we don't need the input of
2526 this line. */
2527 if (ignore_content)
2528 {
2529 lr_ignore_rest (ldfile, 0);
2530 break;
2531 }
2532
4b10dd6c
UD
2533 if (ctype->outdigits_act != 0)
2534 lr_error (ldfile, _("\
2535%s: field `%s' declared more than once"),
2536 "LC_CTYPE", "outdigit");
2537 class_bit = 0;
2538 class256_bit = 0;
2539 handle_digits = 2;
2540 goto read_charclass;
2541
2542 case tok_toupper:
b9eb05d6
UD
2543 /* Ignore the rest of the line if we don't need the input of
2544 this line. */
2545 if (ignore_content)
2546 {
2547 lr_ignore_rest (ldfile, 0);
2548 break;
2549 }
2550
4b10dd6c
UD
2551 mapidx = 0;
2552 goto read_mapping;
2553
2554 case tok_tolower:
b9eb05d6
UD
2555 /* Ignore the rest of the line if we don't need the input of
2556 this line. */
2557 if (ignore_content)
2558 {
2559 lr_ignore_rest (ldfile, 0);
2560 break;
2561 }
2562
4b10dd6c
UD
2563 mapidx = 1;
2564 goto read_mapping;
2565
2566 case tok_map:
b9eb05d6
UD
2567 /* Ignore the rest of the line if we don't need the input of
2568 this line. */
2569 if (ignore_content)
2570 {
2571 lr_ignore_rest (ldfile, 0);
2572 break;
2573 }
2574
4b10dd6c
UD
2575 /* We simply forget the `map' keyword and use the following
2576 operand to determine the mapping. */
47e8b443 2577 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2578 if (now->tok == tok_ident || now->tok == tok_string)
2579 {
2580 size_t cnt;
2581
2582 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2583 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2584 break;
2585
7f653277
UD
2586 if (cnt < ctype->map_collection_nr)
2587 free (now->val.str.startmb);
2588 else
87372aa9
UD
2589 /* OK, it's a new map. */
2590 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2591
2592 mapidx = cnt;
4b10dd6c
UD
2593 }
2594 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2595 goto err_label;
2596 else
2597 mapidx = now->tok - tok_toupper;
2598
47e8b443 2599 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2600 /* This better should be a semicolon. */
2601 if (now->tok != tok_semicolon)
2602 goto err_label;
2603
2604 read_mapping:
2605 /* Test whether this mapping was already defined. */
2606 if (ctype->tomap_done[mapidx])
2607 {
2608 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2609 ctype->mapnames[mapidx]);
2610 lr_ignore_rest (ldfile, 0);
2611 break;
2612 }
2613 ctype->tomap_done[mapidx] = 1;
2614
47e8b443 2615 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2616 while (now->tok != tok_eol && now->tok != tok_eof)
2617 {
2618 struct charseq *from_seq;
2619 uint32_t from_wch;
2620 struct charseq *to_seq;
2621 uint32_t to_wch;
2622
2623 /* Every pair starts with an opening brace. */
2624 if (now->tok != tok_open_brace)
2625 goto err_label;
2626
2627 /* Next comes the from-value. */
47e8b443 2628 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2629 if (get_character (now, charmap, repertoire, &from_seq,
2630 &from_wch) != 0)
2631 goto err_label;
2632
2633 /* The next is a comma. */
47e8b443 2634 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2635 if (now->tok != tok_comma)
2636 goto err_label;
2637
2638 /* And the other value. */
47e8b443 2639 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2640 if (get_character (now, charmap, repertoire, &to_seq,
2641 &to_wch) != 0)
2642 goto err_label;
2643
2644 /* And the last thing is the closing brace. */
47e8b443 2645 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2646 if (now->tok != tok_close_brace)
2647 goto err_label;
2648
2649 if (!ignore_content)
2650 {
f0c7c524
UD
2651 /* Check whether the mapping converts from an ASCII value
2652 to a non-ASCII value. */
2653 if (from_seq != NULL && from_seq->nbytes == 1
2654 && isascii (from_seq->bytes[0])
2655 && to_seq != NULL && (to_seq->nbytes != 1
2656 || !isascii (to_seq->bytes[0])))
2657 ctype->to_nonascii = 1;
2658
4b10dd6c
UD
2659 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2660 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2661 /* We can use this value. */
2662 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2663 = to_seq->bytes[0];
2664
2665 if (from_wch != ILLEGAL_CHAR_VALUE
2666 && to_wch != ILLEGAL_CHAR_VALUE)
2667 /* Both correct values. */
2668 *find_idx (ctype, &ctype->map_collection[mapidx],
2669 &ctype->map_collection_max[mapidx],
2670 &ctype->map_collection_act[mapidx],
2671 from_wch) = to_wch;
2672 }
2673
2674 /* Now comes a semicolon or the end of the line/file. */
47e8b443 2675 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2676 if (now->tok == tok_semicolon)
47e8b443 2677 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2678 }
2679 break;
2680
2681 case tok_translit_start:
02fb3d17
UD
2682 /* Ignore the entire translit section with its peculiar syntax
2683 if we don't need the input. */
b9eb05d6
UD
2684 if (ignore_content)
2685 {
02fb3d17
UD
2686 do
2687 {
2688 lr_ignore_rest (ldfile, 0);
47e8b443 2689 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02fb3d17
UD
2690 }
2691 while (now->tok != tok_translit_end && now->tok != tok_eof);
2692
2693 if (now->tok == tok_eof)
2694 lr_error (ldfile, _(\
2695"%s: `translit_start' section does not end with `translit_end'"),
2696 "LC_CTYPE");
2697
b9eb05d6
UD
2698 break;
2699 }
2700
4b10dd6c
UD
2701 /* The rest of the line better should be empty. */
2702 lr_ignore_rest (ldfile, 1);
2703
2704 /* We count here the number of allocated entries in the `translit'
2705 array. */
2706 cnt = 0;
2707
4b156cb2
UD
2708 ldfile->translate_strings = 1;
2709 ldfile->return_widestr = 1;
2710
4b10dd6c 2711 /* We proceed until we see the `translit_end' token. */
47e8b443 2712 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
4b10dd6c
UD
2713 now->tok != tok_translit_end && now->tok != tok_eof)
2714 {
2715 if (now->tok == tok_eol)
2716 /* Ignore empty lines. */
2717 continue;
2718
4b10dd6c
UD
2719 if (now->tok == tok_include)
2720 {
2721 /* We have to include locale. */
2722 const char *locale_name;
2723 const char *repertoire_name;
02fb3d17 2724 struct translit_include_t *include_stmt, **include_ptr;
4b10dd6c 2725
47e8b443 2726 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2727 /* This should be a string or an identifier. In any
2728 case something to name a locale. */
2729 if (now->tok != tok_string && now->tok != tok_ident)
2730 {
2731 translit_syntax:
2732 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2733 lr_ignore_rest (ldfile, 0);
2734 continue;
2735 }
2736 locale_name = now->val.str.startmb;
2737
2738 /* Next should be a semicolon. */
47e8b443 2739 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2740 if (now->tok != tok_semicolon)
2741 goto translit_syntax;
2742
2743 /* Now the repertoire name. */
47e8b443 2744 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2745 if ((now->tok != tok_string && now->tok != tok_ident)
2746 || now->val.str.startmb == NULL)
2747 goto translit_syntax;
2748 repertoire_name = now->val.str.startmb;
d9cab009
RM
2749 if (repertoire_name[0] == '\0')
2750 /* Ignore the empty string. */
2751 repertoire_name = NULL;
4b10dd6c 2752
02fb3d17
UD
2753 /* Save the include statement for later processing. */
2754 include_stmt = (struct translit_include_t *)
2755 xmalloc (sizeof (struct translit_include_t));
2756 include_stmt->copy_locale = locale_name;
2757 include_stmt->copy_repertoire = repertoire_name;
2758 include_stmt->next = NULL;
4b10dd6c 2759
02fb3d17
UD
2760 include_ptr = &ctype->translit_include;
2761 while (*include_ptr != NULL)
2762 include_ptr = &(*include_ptr)->next;
2763 *include_ptr = include_stmt;
4b10dd6c
UD
2764
2765 /* The rest of the line must be empty. */
2766 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2767
2768 /* Make sure the locale is read. */
02fb3d17
UD
2769 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2770 1, NULL);
a673fbcb
UD
2771 continue;
2772 }
2773 else if (now->tok == tok_default_missing)
2774 {
2775 uint32_t *wstr;
2776
c9f79e08 2777 while (1)
a673fbcb 2778 {
c9f79e08
UD
2779 /* We expect a single character or string as the
2780 argument. */
47e8b443 2781 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
c9f79e08
UD
2782 wstr = read_widestring (ldfile, now, charmap,
2783 repertoire);
2784
2785 if (wstr != NULL)
a673fbcb 2786 {
c9f79e08
UD
2787 if (ctype->default_missing != NULL)
2788 {
2789 lr_error (ldfile, _("\
a673fbcb 2790%s: duplicate `default_missing' definition"), "LC_CTYPE");
f2b98f97
UD
2791 WITH_CUR_LOCALE (error_at_line (0, 0,
2792 ctype->default_missing_file,
2793 ctype->default_missing_lineno,
2794 _("\
2795previous definition was here")));
c9f79e08
UD
2796 }
2797 else
2798 {
2799 ctype->default_missing = wstr;
2800 ctype->default_missing_file = ldfile->fname;
2801 ctype->default_missing_lineno = ldfile->lineno;
2802 }
4b156cb2
UD
2803 /* We can have more entries, ignore them. */
2804 lr_ignore_rest (ldfile, 0);
c9f79e08 2805 break;
a673fbcb 2806 }
c9f79e08
UD
2807 else if (wstr == (uint32_t *) -1l)
2808 /* This was an syntax error. */
2809 break;
2810
2811 /* Maybe there is another replacement we can use. */
47e8b443 2812 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
c9f79e08 2813 if (now->tok == tok_eol || now->tok == tok_eof)
a673fbcb 2814 {
c9f79e08
UD
2815 /* Nothing found. We tell the user. */
2816 lr_error (ldfile, _("\
0232a3ae 2817%s: no representable `default_missing' definition found"), "LC_CTYPE");
c9f79e08 2818 break;
a673fbcb 2819 }
c9f79e08
UD
2820 if (now->tok != tok_semicolon)
2821 goto translit_syntax;
a673fbcb 2822 }
c9f79e08 2823
a673fbcb
UD
2824 continue;
2825 }
2826 else if (now->tok == tok_translit_ignore)
2827 {
2828 read_translit_ignore_entry (ldfile, ctype, charmap,
2829 repertoire);
4b10dd6c
UD
2830 continue;
2831 }
2832
2833 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2834 }
4b156cb2 2835 ldfile->return_widestr = 0;
02fb3d17
UD
2836
2837 if (now->tok == tok_eof)
2838 lr_error (ldfile, _(\
2839"%s: `translit_start' section does not end with `translit_end'"),
2840 "LC_CTYPE");
2841
4b10dd6c
UD
2842 break;
2843
2844 case tok_ident:
b9eb05d6
UD
2845 /* Ignore the rest of the line if we don't need the input of
2846 this line. */
2847 if (ignore_content)
2848 {
2849 lr_ignore_rest (ldfile, 0);
2850 break;
2851 }
2852
4b10dd6c
UD
2853 /* This could mean one of several things. First test whether
2854 it's a character class name. */
2855 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2856 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2857 break;
2858 if (cnt < ctype->nr_charclass)
2859 {
2860 class_bit = _ISwbit (cnt);
2861 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2862 free (now->val.str.startmb);
2863 goto read_charclass;
2864 }
5491da0d
UD
2865 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2866 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2867 break;
2868 if (cnt < ctype->map_collection_nr)
2869 {
2870 mapidx = cnt;
2871 free (now->val.str.startmb);
2872 goto read_mapping;
2873 }
011ebfab 2874#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2875 if (strcmp (now->val.str.startmb, "special1") == 0)
2876 {
2877 class_bit = _ISwspecial1;
2878 free (now->val.str.startmb);
2879 goto read_charclass;
2880 }
2881 if (strcmp (now->val.str.startmb, "special2") == 0)
2882 {
2883 class_bit = _ISwspecial2;
2884 free (now->val.str.startmb);
2885 goto read_charclass;
2886 }
2887 if (strcmp (now->val.str.startmb, "special3") == 0)
2888 {
2889 class_bit = _ISwspecial3;
2890 free (now->val.str.startmb);
2891 goto read_charclass;
2892 }
2893 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2894 {
2895 mapidx = 2;
2896 goto read_mapping;
2897 }
011ebfab 2898#endif
4b10dd6c
UD
2899 break;
2900
2901 case tok_end:
2902 /* Next we assume `LC_CTYPE'. */
47e8b443 2903 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2904 if (now->tok == tok_eof)
2905 break;
2906 if (now->tok == tok_eol)
2907 lr_error (ldfile, _("%s: incomplete `END' line"),
2908 "LC_CTYPE");
2909 else if (now->tok != tok_lc_ctype)
2910 lr_error (ldfile, _("\
2911%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2912 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2913 return;
2914
2915 default:
2916 err_label:
2917 if (now->tok != tok_eof)
2918 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2919 }
2920
4b10dd6c 2921 /* Prepare for the next round. */
47e8b443 2922 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2923 nowtok = now->tok;
19bc17a9
RM
2924 }
2925
4b10dd6c
UD
2926 /* When we come here we reached the end of the file. */
2927 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2928}
2929
2930
2931static void
47e8b443
UD
2932set_class_defaults (struct locale_ctype_t *ctype,
2933 const struct charmap_t *charmap,
4b10dd6c 2934 struct repertoire_t *repertoire)
19bc17a9 2935{
4b10dd6c
UD
2936 size_t cnt;
2937
19bc17a9
RM
2938 /* These function defines the default values for the classes and conversions
2939 according to POSIX.2 2.5.2.1.
2940 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2941 Don't move them unless you know what you do! */
2942
938c669e
AJ
2943 auto void set_default (int bitpos, int from, int to);
2944
4b10dd6c 2945 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2946 {
2947 char tmp[2];
2948 int ch;
4b10dd6c
UD
2949 int bit = _ISbit (bitpos);
2950 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2951 /* Define string. */
2952 strcpy (tmp, "?");
2953
2954 for (ch = from; ch <= to; ++ch)
2955 {
4b10dd6c 2956 struct charseq *seq;
19bc17a9
RM
2957 tmp[0] = ch;
2958
4b10dd6c 2959 seq = charmap_find_value (charmap, tmp, 1);
69c69fe1
UD
2960 if (seq == NULL)
2961 {
2962 char buf[10];
2963 sprintf (buf, "U%08X", ch);
2964 seq = charmap_find_value (charmap, buf, 9);
2965 }
4b10dd6c
UD
2966 if (seq == NULL)
2967 {
2968 if (!be_quiet)
f2b98f97 2969 WITH_CUR_LOCALE (error (0, 0, _("\
11bf311e 2970%s: character `%s' not defined while needed as default value"),
f2b98f97 2971 "LC_CTYPE", tmp));
19bc17a9 2972 }
4b10dd6c 2973 else if (seq->nbytes != 1)
f2b98f97 2974 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 2975%s: character `%s' in charmap not representable with one byte"),
f2b98f97 2976 "LC_CTYPE", tmp));
19bc17a9 2977 else
4b10dd6c 2978 ctype->class256_collection[seq->bytes[0]] |= bit;
f0a4b6b1
UD
2979
2980 /* No need to search here, the ASCII value is also the Unicode
2981 value. */
2982 ELEM (ctype, class_collection, , ch) |= bitw;
19bc17a9
RM
2983 }
2984 }
2985
2986 /* Set default values if keyword was not present. */
4b10dd6c 2987 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2988 /* "If this keyword [lower] is not specified, the lowercase letters
2989 `A' through `Z', ..., shall automatically belong to this class,
2990 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2991 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2992
4b10dd6c 2993 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2994 /* "If this keyword [lower] is not specified, the lowercase letters
2995 `a' through `z', ..., shall automatically belong to this class,
2996 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2997 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2998
4b10dd6c 2999 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
3000 {
3001 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3002 class `lower' *must* be in class `alpha'. */
3003 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
3004 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3005
3006 for (cnt = 0; cnt < 256; ++cnt)
3007 if ((ctype->class256_collection[cnt] & mask) != 0)
3008 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9
RM
3009
3010 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
3011 if ((ctype->class_collection[cnt] & maskw) != 0)
3012 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
3013 }
3014
4b10dd6c 3015 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
3016 /* "If this keyword [digit] is not specified, the digits `0' through
3017 `9', ..., shall automatically belong to this class, with
3018 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 3019 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
3020
3021 /* "Only characters specified for the `alpha' and `digit' keyword
3022 shall be specified. Characters specified for the keyword `alpha'
3023 and `digit' are automatically included in this class. */
3024 {
3025 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
3026 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3027
3028 for (cnt = 0; cnt < 256; ++cnt)
3029 if ((ctype->class256_collection[cnt] & mask) != 0)
3030 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9
RM
3031
3032 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
3033 if ((ctype->class_collection[cnt] & maskw) != 0)
3034 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
3035 }
3036
4b10dd6c 3037 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
3038 /* "If this keyword [space] is not specified, the characters <space>,
3039 <form-feed>, <newline>, <carriage-return>, <tab>, and
3040 <vertical-tab>, ..., shall automatically belong to this class,
3041 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3042 {
4b10dd6c 3043 struct charseq *seq;
19bc17a9 3044
4b10dd6c 3045 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3046 if (seq == NULL)
3047 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3048 if (seq == NULL)
3049 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 3050 if (seq == NULL)
880f421f
UD
3051 {
3052 if (!be_quiet)
f2b98f97 3053 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3054%s: character `%s' not defined while needed as default value"),
f2b98f97 3055 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3056 }
3057 else if (seq->nbytes != 1)
f2b98f97 3058 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3059%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3060 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3061 else
3062 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3063
f0a4b6b1 3064 /* No need to search. */
ce177a84 3065 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
19bc17a9 3066
4b10dd6c 3067 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
3068 if (seq == NULL)
3069 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 3070 if (seq == NULL)
880f421f
UD
3071 {
3072 if (!be_quiet)
f2b98f97 3073 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3074%s: character `%s' not defined while needed as default value"),
f2b98f97 3075 "LC_CTYPE", "<form-feed>"));
4b10dd6c
UD
3076 }
3077 else if (seq->nbytes != 1)
f2b98f97 3078 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3079%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3080 "LC_CTYPE", "<form-feed>"));
4b10dd6c
UD
3081 else
3082 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3083
f0a4b6b1 3084 /* No need to search. */
ce177a84 3085 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
4b10dd6c 3086
19bc17a9 3087
4b10dd6c 3088 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
3089 if (seq == NULL)
3090 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 3091 if (seq == NULL)
880f421f
UD
3092 {
3093 if (!be_quiet)
f2b98f97 3094 WITH_CUR_LOCALE (error (0, 0, _("\
11bf311e
UD
3095%s: character `%s' not defined while needed as default value"),
3096 "LC_CTYPE", "<newline>"));
4b10dd6c
UD
3097 }
3098 else if (seq->nbytes != 1)
f2b98f97 3099 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3100%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3101 "LC_CTYPE", "<newline>"));
4b10dd6c
UD
3102 else
3103 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3104
f0a4b6b1 3105 /* No need to search. */
ce177a84 3106 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
4b10dd6c 3107
19bc17a9 3108
4b10dd6c 3109 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
3110 if (seq == NULL)
3111 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 3112 if (seq == NULL)
880f421f
UD
3113 {
3114 if (!be_quiet)
f2b98f97 3115 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3116%s: character `%s' not defined while needed as default value"),
f2b98f97 3117 "LC_CTYPE", "<carriage-return>"));
4b10dd6c
UD
3118 }
3119 else if (seq->nbytes != 1)
f2b98f97 3120 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3121%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3122 "LC_CTYPE", "<carriage-return>"));
4b10dd6c
UD
3123 else
3124 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3125
f0a4b6b1 3126 /* No need to search. */
ce177a84 3127 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
4b10dd6c 3128
19bc17a9 3129
4b10dd6c 3130 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
3131 if (seq == NULL)
3132 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 3133 if (seq == NULL)
880f421f
UD
3134 {
3135 if (!be_quiet)
f2b98f97 3136 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3137%s: character `%s' not defined while needed as default value"),
f2b98f97 3138 "LC_CTYPE", "<tab>"));
4b10dd6c
UD
3139 }
3140 else if (seq->nbytes != 1)
f2b98f97 3141 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3142%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3143 "LC_CTYPE", "<tab>"));
4b10dd6c
UD
3144 else
3145 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3146
f0a4b6b1 3147 /* No need to search. */
ce177a84 3148 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
4b10dd6c 3149
4b10dd6c
UD
3150
3151 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
3152 if (seq == NULL)
3153 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
3154 if (seq == NULL)
3155 {
3156 if (!be_quiet)
f2b98f97 3157 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3158%s: character `%s' not defined while needed as default value"),
f2b98f97 3159 "LC_CTYPE", "<vertical-tab>"));
4b10dd6c
UD
3160 }
3161 else if (seq->nbytes != 1)
f2b98f97 3162 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3163%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3164 "LC_CTYPE", "<vertical-tab>"));
4b10dd6c
UD
3165 else
3166 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
3167
3168 /* No need to search. */
ce177a84 3169 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
19bc17a9
RM
3170 }
3171
4b10dd6c 3172 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
3173 /* "If this keyword is not specified, the digits `0' to `9', the
3174 uppercase letters `A' through `F', and the lowercase letters `a'
3175 through `f', ..., shell automatically belong to this class, with
3176 implementation defined character values." [P1003.2, 2.5.2.1] */
3177 {
4b10dd6c
UD
3178 set_default (BITPOS (tok_xdigit), '0', '9');
3179 set_default (BITPOS (tok_xdigit), 'A', 'F');
3180 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
3181 }
3182
4b10dd6c 3183 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
3184 /* "If this keyword [blank] is unspecified, the characters <space> and
3185 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3186 {
4b10dd6c 3187 struct charseq *seq;
19bc17a9 3188
4b10dd6c 3189 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3190 if (seq == NULL)
3191 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3192 if (seq == NULL)
3193 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 3194 if (seq == NULL)
880f421f
UD
3195 {
3196 if (!be_quiet)
f2b98f97 3197 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3198%s: character `%s' not defined while needed as default value"),
f2b98f97 3199 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3200 }
3201 else if (seq->nbytes != 1)
f2b98f97 3202 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3203%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3204 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3205 else
3206 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3207
f0a4b6b1 3208 /* No need to search. */
ce177a84 3209 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
4b10dd6c 3210
4b10dd6c
UD
3211
3212 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
3213 if (seq == NULL)
3214 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
3215 if (seq == NULL)
3216 {
3217 if (!be_quiet)
f2b98f97 3218 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3219%s: character `%s' not defined while needed as default value"),
f2b98f97 3220 "LC_CTYPE", "<tab>"));
4b10dd6c
UD
3221 }
3222 else if (seq->nbytes != 1)
f2b98f97 3223 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3224%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3225 "LC_CTYPE", "<tab>"));
4b10dd6c
UD
3226 else
3227 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
3228
3229 /* No need to search. */
ce177a84 3230 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
19bc17a9
RM
3231 }
3232
4b10dd6c 3233 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
3234 /* "If this keyword [graph] is not specified, characters specified for
3235 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3236 shall belong to this character class." [P1003.2, 2.5.2.1] */
3237 {
3238 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3239 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3240 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3241 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3242 BITw (tok_punct);
19bc17a9
RM
3243 size_t cnt;
3244
3245 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3246 if ((ctype->class_collection[cnt] & maskw) != 0)
3247 ctype->class_collection[cnt] |= BITw (tok_graph);
4b10dd6c
UD
3248
3249 for (cnt = 0; cnt < 256; ++cnt)
3250 if ((ctype->class256_collection[cnt] & mask) != 0)
3251 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
3252 }
3253
4b10dd6c 3254 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
3255 /* "If this keyword [print] is not provided, characters specified for
3256 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3257 and the <space> character shall belong to this character class."
3258 [P1003.2, 2.5.2.1] */
3259 {
3260 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3261 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3262 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3263 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3264 BITw (tok_punct);
19bc17a9 3265 size_t cnt;
4b10dd6c 3266 struct charseq *seq;
19bc17a9
RM
3267
3268 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3269 if ((ctype->class_collection[cnt] & maskw) != 0)
3270 ctype->class_collection[cnt] |= BITw (tok_print);
19bc17a9 3271
4b10dd6c
UD
3272 for (cnt = 0; cnt < 256; ++cnt)
3273 if ((ctype->class256_collection[cnt] & mask) != 0)
3274 ctype->class256_collection[cnt] |= BIT (tok_print);
3275
3276
4b10dd6c 3277 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3278 if (seq == NULL)
3279 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3280 if (seq == NULL)
3281 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
3282 if (seq == NULL)
3283 {
3284 if (!be_quiet)
f2b98f97 3285 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3286%s: character `%s' not defined while needed as default value"),
f2b98f97 3287 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3288 }
3289 else if (seq->nbytes != 1)
f2b98f97 3290 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3291%s: character `%s' in charmap not representable with one byte"),
f2b98f97 3292 "LC_CTYPE", "<space>"));
4b10dd6c
UD
3293 else
3294 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
3295
3296 /* No need to search. */
ce177a84 3297 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
19bc17a9
RM
3298 }
3299
4b10dd6c 3300 if (ctype->tomap_done[0] == 0)
6d52618b 3301 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
3302 `a' through `z', and their corresponding uppercase letters `A' to
3303 `Z', ..., shall automatically be included, with implementation-
3304 defined character values." [P1003.2, 2.5.2.1] */
3305 {
3306 char tmp[4];
3307 int ch;
3308
3309 strcpy (tmp, "<?>");
3310
3311 for (ch = 'a'; ch <= 'z'; ++ch)
3312 {
4b10dd6c 3313 struct charseq *seq_from, *seq_to;
19bc17a9
RM
3314
3315 tmp[1] = (char) ch;
3316
4b10dd6c 3317 seq_from = charmap_find_value (charmap, &tmp[1], 1);
69c69fe1
UD
3318 if (seq_from == NULL)
3319 {
3320 char buf[10];
3321 sprintf (buf, "U%08X", ch);
3322 seq_from = charmap_find_value (charmap, buf, 9);
3323 }
4b10dd6c 3324 if (seq_from == NULL)
19bc17a9 3325 {
880f421f 3326 if (!be_quiet)
f2b98f97 3327 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3328%s: character `%s' not defined while needed as default value"),
f2b98f97 3329 "LC_CTYPE", tmp));
4b10dd6c
UD
3330 }
3331 else if (seq_from->nbytes != 1)
3332 {
3333 if (!be_quiet)
f2b98f97 3334 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3335%s: character `%s' needed as default value not representable with one byte"),
f2b98f97 3336 "LC_CTYPE", tmp));
4b10dd6c
UD
3337 }
3338 else
3339 {
3340 /* This conversion is implementation defined. */
3341 tmp[1] = (char) (ch + ('A' - 'a'));
3342 seq_to = charmap_find_value (charmap, &tmp[1], 1);
69c69fe1
UD
3343 if (seq_to == NULL)
3344 {
3345 char buf[10];
3346 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3347 seq_to = charmap_find_value (charmap, buf, 9);
3348 }
4b10dd6c
UD
3349 if (seq_to == NULL)
3350 {
3351 if (!be_quiet)
f2b98f97 3352 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3353%s: character `%s' not defined while needed as default value"),
f2b98f97 3354 "LC_CTYPE", tmp));
4b10dd6c
UD
3355 }
3356 else if (seq_to->nbytes != 1)
3357 {
3358 if (!be_quiet)
f2b98f97 3359 WITH_CUR_LOCALE (error (0, 0, _("\
4b10dd6c 3360%s: character `%s' needed as default value not representable with one byte"),
f2b98f97 3361 "LC_CTYPE", tmp));
4b10dd6c
UD
3362 }
3363 else
3364 /* The index [0] is determined by the order of the
3365 `ctype_map_newP' calls in `ctype_startup'. */
3366 ctype->map256_collection[0][seq_from->bytes[0]]
3367 = seq_to->bytes[0];
19bc17a9 3368 }
f0a4b6b1
UD
3369
3370 /* No need to search. */
3371 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
3372 }
3373 }
3374
4b10dd6c 3375 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
3376 /* "If this keyword [tolower] is not specified, the mapping shall be
3377 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3378 {
19bc17a9
RM
3379 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3380 if (ctype->map_collection[0][cnt] != 0)
3381 ELEM (ctype, map_collection, [1],
3382 ctype->map_collection[0][cnt])
3383 = ctype->charnames[cnt];
4b10dd6c
UD
3384
3385 for (cnt = 0; cnt < 256; ++cnt)
3386 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 3387 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
3388 }
3389
69c69fe1 3390 if (ctype->outdigits_act != 10)
4b10dd6c 3391 {
69c69fe1 3392 if (ctype->outdigits_act != 0)
f2b98f97
UD
3393 WITH_CUR_LOCALE (error (0, 0, _("\
3394%s: field `%s' does not contain exactly ten entries"),
3395 "LC_CTYPE", "outdigit"));
69c69fe1
UD
3396
3397 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
4b10dd6c
UD
3398 {
3399 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
701666b7
UD
3400 (char *) digits + cnt,
3401 1);
4b10dd6c
UD
3402
3403 if (ctype->mboutdigits[cnt] == NULL)
1b97149d
UD
3404 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3405 longnames[cnt],
3406 strlen (longnames[cnt]));
b9eb05d6 3407
1b97149d
UD
3408 if (ctype->mboutdigits[cnt] == NULL)
3409 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3410 uninames[cnt], 9);
b9eb05d6 3411
1b97149d 3412 if (ctype->mboutdigits[cnt] == NULL)
b9eb05d6 3413 {
1b97149d 3414 /* Provide a replacement. */
f2b98f97
UD
3415 WITH_CUR_LOCALE (error (0, 0, _("\
3416no output digits defined and none of the standard names in the charmap")));
b9eb05d6 3417
47e8b443 3418 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
1b97149d
UD
3419 sizeof (struct charseq)
3420 + 1);
b9eb05d6 3421
1b97149d
UD
3422 /* This is better than nothing. */
3423 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3424 ctype->mboutdigits[cnt]->nbytes = 1;
b9eb05d6 3425 }
1b97149d
UD
3426
3427 ctype->wcoutdigits[cnt] = L'0' + cnt;
4b10dd6c
UD
3428 }
3429
3430 ctype->outdigits_act = 10;
19bc17a9
RM
3431 }
3432}
3433
3434
ef446144
UD
3435/* Initialize. Assumes t->p and t->q have already been set. */
3436static inline void
3437wctype_table_init (struct wctype_table *t)
3438{
d6040f17 3439 t->level1 = NULL;
ef446144 3440 t->level1_alloc = t->level1_size = 0;
d6040f17 3441 t->level2 = NULL;
ef446144 3442 t->level2_alloc = t->level2_size = 0;
d6040f17 3443 t->level3 = NULL;
ef446144
UD
3444 t->level3_alloc = t->level3_size = 0;
3445}
3446
ec08818d
UD
3447/* Retrieve an entry. */
3448static inline int
3449wctype_table_get (struct wctype_table *t, uint32_t wc)
3450{
3451 uint32_t index1 = wc >> (t->q + t->p + 5);
3452 if (index1 < t->level1_size)
3453 {
3454 uint32_t lookup1 = t->level1[index1];
bd75759f 3455 if (lookup1 != EMPTY)
ec08818d
UD
3456 {
3457 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3458 + (lookup1 << t->q);
3459 uint32_t lookup2 = t->level2[index2];
bd75759f 3460 if (lookup2 != EMPTY)
ec08818d
UD
3461 {
3462 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3463 + (lookup2 << t->p);
3464 uint32_t lookup3 = t->level3[index3];
3465 uint32_t index4 = wc & 0x1f;
3466
3467 return (lookup3 >> index4) & 1;
3468 }
3469 }
3470 }
3471 return 0;
3472}
3473
ef446144
UD
3474/* Add one entry. */
3475static void
3476wctype_table_add (struct wctype_table *t, uint32_t wc)
3477{
3478 uint32_t index1 = wc >> (t->q + t->p + 5);
3479 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3480 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3481 uint32_t index4 = wc & 0x1f;
3482 size_t i, i1, i2;
3483
3484 if (index1 >= t->level1_size)
3485 {
3486 if (index1 >= t->level1_alloc)
3487 {
3488 size_t alloc = 2 * t->level1_alloc;
3489 if (alloc <= index1)
3490 alloc = index1 + 1;
d6040f17
UD
3491 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3492 alloc * sizeof (uint32_t));
ef446144
UD
3493 t->level1_alloc = alloc;
3494 }
3495 while (index1 >= t->level1_size)
bd75759f 3496 t->level1[t->level1_size++] = EMPTY;
ef446144
UD
3497 }
3498
bd75759f 3499 if (t->level1[index1] == EMPTY)
ef446144
UD
3500 {
3501 if (t->level2_size == t->level2_alloc)
3502 {
3503 size_t alloc = 2 * t->level2_alloc + 1;
d6040f17
UD
3504 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3505 (alloc << t->q) * sizeof (uint32_t));
ef446144
UD
3506 t->level2_alloc = alloc;
3507 }
3508 i1 = t->level2_size << t->q;
3509 i2 = (t->level2_size + 1) << t->q;
3510 for (i = i1; i < i2; i++)
bd75759f 3511 t->level2[i] = EMPTY;
ef446144
UD
3512 t->level1[index1] = t->level2_size++;
3513 }
3514
3515 index2 += t->level1[index1] << t->q;
3516
bd75759f 3517 if (t->level2[index2] == EMPTY)
ef446144
UD
3518 {
3519 if (t->level3_size == t->level3_alloc)
3520 {
3521 size_t alloc = 2 * t->level3_alloc + 1;
d6040f17
UD
3522 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3523 (alloc << t->p) * sizeof (uint32_t));
ef446144
UD
3524 t->level3_alloc = alloc;
3525 }
3526 i1 = t->level3_size << t->p;
3527 i2 = (t->level3_size + 1) << t->p;
3528 for (i = i1; i < i2; i++)
3529 t->level3[i] = 0;
3530 t->level2[index2] = t->level3_size++;
3531 }
3532
3533 index3 += t->level2[index2] << t->p;
3534
3535 t->level3[index3] |= (uint32_t)1 << index4;
3536}
3537
3538/* Finalize and shrink. */
3539static void
1ecbb381 3540add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
ef446144
UD
3541{
3542 size_t i, j, k;
3543 uint32_t reorder3[t->level3_size];
3544 uint32_t reorder2[t->level2_size];
1ecbb381 3545 uint32_t level2_offset, level3_offset;
ef446144
UD
3546
3547 /* Uniquify level3 blocks. */
3548 k = 0;
3549 for (j = 0; j < t->level3_size; j++)
3550 {
3551 for (i = 0; i < k; i++)
3552 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3553 (1 << t->p) * sizeof (uint32_t)) == 0)
3554 break;
3555 /* Relocate block j to block i. */
3556 reorder3[j] = i;
3557 if (i == k)
3558 {
3559 if (i != j)
3560 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3561 (1 << t->p) * sizeof (uint32_t));
3562 k++;
3563 }
3564 }
3565 t->level3_size = k;
3566
3567 for (i = 0; i < (t->level2_size << t->q); i++)
bd75759f 3568 if (t->level2[i] != EMPTY)
ef446144
UD
3569 t->level2[i] = reorder3[t->level2[i]];
3570
3571 /* Uniquify level2 blocks. */
3572 k = 0;
3573 for (j = 0; j < t->level2_size; j++)
3574 {
3575 for (i = 0; i < k; i++)
3576 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3577 (1 << t->q) * sizeof (uint32_t)) == 0)
3578 break;
3579 /* Relocate block j to block i. */
3580 reorder2[j] = i;
3581 if (i == k)
3582 {
3583 if (i != j)
3584 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3585 (1 << t->q) * sizeof (uint32_t));
3586 k++;
3587 }
3588 }
3589 t->level2_size = k;
3590
3591 for (i = 0; i < t->level1_size; i++)
bd75759f 3592 if (t->level1[i] != EMPTY)
ef446144
UD
3593 t->level1[i] = reorder2[t->level1[i]];
3594
ef446144
UD
3595 t->result_size =
3596 5 * sizeof (uint32_t)
3597 + t->level1_size * sizeof (uint32_t)
3598 + (t->level2_size << t->q) * sizeof (uint32_t)
3599 + (t->level3_size << t->p) * sizeof (uint32_t);
ef446144 3600
ef446144
UD
3601 level2_offset =
3602 5 * sizeof (uint32_t)
3603 + t->level1_size * sizeof (uint32_t);
3604 level3_offset =
3605 5 * sizeof (uint32_t)
3606 + t->level1_size * sizeof (uint32_t)
3607 + (t->level2_size << t->q) * sizeof (uint32_t);
3608
1ecbb381
RS
3609 start_locale_structure (file);
3610 add_locale_uint32 (file, t->q + t->p + 5);
3611 add_locale_uint32 (file, t->level1_size);
3612 add_locale_uint32 (file, t->p + 5);
3613 add_locale_uint32 (file, (1 << t->q) - 1);
3614 add_locale_uint32 (file, (1 << t->p) - 1);
ef446144
UD
3615
3616 for (i = 0; i < t->level1_size; i++)
1ecbb381
RS
3617 add_locale_uint32
3618 (file,
3619 t->level1[i] == EMPTY
ef446144
UD
3620 ? 0
3621 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3622
3623 for (i = 0; i < (t->level2_size << t->q); i++)
1ecbb381
RS
3624 add_locale_uint32
3625 (file,
3626 t->level2[i] == EMPTY
ef446144
UD
3627 ? 0
3628 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3629
1ecbb381
RS
3630 add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3631 end_locale_structure (file);
ef446144
UD
3632
3633 if (t->level1_alloc > 0)
3634 free (t->level1);
3635 if (t->level2_alloc > 0)
3636 free (t->level2);
3637 if (t->level3_alloc > 0)
3638 free (t->level3);
3639}
3640
02fb3d17
UD
3641/* Flattens the included transliterations into a translit list.
3642 Inserts them in the list at `cursor', and returns the new cursor. */
3643static struct translit_t **
47e8b443
UD
3644translit_flatten (struct locale_ctype_t *ctype,
3645 const struct charmap_t *charmap,
02fb3d17
UD
3646 struct translit_t **cursor)
3647{
3648 while (ctype->translit_include != NULL)
3649 {
3650 const char *copy_locale = ctype->translit_include->copy_locale;
3651 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3652 struct localedef_t *other;
3653
3654 /* Unchain the include statement. During the depth-first traversal
3655 we don't want to visit any locale more than once. */
3656 ctype->translit_include = ctype->translit_include->next;
3657
3658 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3659
6e310111 3660 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
02fb3d17 3661 {
f2b98f97 3662 WITH_CUR_LOCALE (error (0, 0, _("\
02fb3d17 3663%s: transliteration data from locale `%s' not available"),
f2b98f97 3664 "LC_CTYPE", copy_locale));
02fb3d17
UD
3665 }
3666 else
3667 {
3668 struct locale_ctype_t *other_ctype =
3669 other->categories[LC_CTYPE].ctype;
3670
3671 cursor = translit_flatten (other_ctype, charmap, cursor);
3672 assert (other_ctype->translit_include == NULL);
3673
3674 if (other_ctype->translit != NULL)
3675 {
3676 /* Insert the other_ctype->translit list at *cursor. */
3677 struct translit_t *endp = other_ctype->translit;
3678 while (endp->next != NULL)
3679 endp = endp->next;
3680
3681 endp->next = *cursor;
3682 *cursor = other_ctype->translit;
3683
3684 /* Avoid any risk of circular lists. */
3685 other_ctype->translit = NULL;
3686
3687 cursor = &endp->next;
3688 }
3689
3690 if (ctype->default_missing == NULL)
3691 ctype->default_missing = other_ctype->default_missing;
3692 }
3693 }
3694
3695 return cursor;
3696}
3697
19bc17a9 3698static void
47e8b443 3699allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
4b10dd6c 3700 struct repertoire_t *repertoire)
19bc17a9 3701{
4c7d276e 3702 size_t idx, nr;
0e16ecfa
UD
3703 const void *key;
3704 size_t len;
3705 void *vdata;
3706 void *curs;
5d431a3e 3707
19bc17a9
RM
3708 /* You wonder about this amount of memory? This is only because some
3709 users do not manage to address the array with unsigned values or
3710 data types with range >= 256. '\200' would result in the array
3711 index -128. To help these poor people we duplicate the entries for
3712 128 up to 255 below the entry for \0. */
4c7d276e
UD
3713 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3714 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3715 ctype->class_b = (uint32_t **)
3716 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
1ecbb381
RS
3717 ctype->class_3level = (struct wctype_table *)
3718 xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
19bc17a9 3719
4a33c2f5 3720 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 3721 for (idx = 0; idx < 256; ++idx)
4a33c2f5 3722 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 3723
75cd5204
RM
3724 /* Mirror first 127 entries. We must take care that entry -1 is not
3725 mirrored because EOF == -1. */
3726 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
3727 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3728
4c7d276e
UD
3729 /* The 32 bit array contains all characters < 0x100. */
3730 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3731 if (ctype->charnames[idx] < 0x100)
3732 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
ef446144 3733
4c7d276e 3734 for (nr = 0; nr < ctype->nr_charclass; nr++)
ef446144 3735 {
4c7d276e 3736 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
ef446144 3737
9a5c46e8
UD
3738 /* We only set CLASS_B for the bits in the ISO C classes, not
3739 the user defined classes. The number should not change but
3740 who knows. */
3741#define LAST_ISO_C_BIT 11
3742 if (nr <= LAST_ISO_C_BIT)
3743 for (idx = 0; idx < 256; ++idx)
3744 if (ctype->class256_collection[idx] & _ISbit (nr))
3745 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
4c7d276e 3746 }
8fb81470 3747
4c7d276e
UD
3748 for (nr = 0; nr < ctype->nr_charclass; nr++)
3749 {
1ecbb381 3750 struct wctype_table *t;
ef446144 3751
1ecbb381
RS
3752 t = &ctype->class_3level[nr];
3753 t->p = 4; /* or: 5 */
3754 t->q = 7; /* or: 6 */
3755 wctype_table_init (t);
ef446144 3756
4c7d276e
UD
3757 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3758 if (ctype->class_collection[idx] & _ISwbit (nr))
1ecbb381 3759 wctype_table_add (t, ctype->charnames[idx]);
ef446144 3760
4c7d276e 3761 if (verbose)
f2b98f97
UD
3762 WITH_CUR_LOCALE (fprintf (stderr, _("\
3763%s: table for class \"%s\": %lu bytes\n"),
3764 "LC_CTYPE", ctype->classnames[nr],
1ecbb381 3765 (unsigned long int) t->result_size));
ef446144 3766 }
19bc17a9
RM
3767
3768 /* Room for table of mappings. */
4c7d276e
UD
3769 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3770 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3771 * sizeof (uint32_t *));
1ecbb381
RS
3772 ctype->map_3level = (struct wctrans_table *)
3773 xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
19bc17a9
RM
3774
3775 /* Fill in all mappings. */
49f2be5b 3776 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
3777 {
3778 unsigned int idx2;
3779
3780 /* Allocate table. */
4c7d276e
UD
3781 ctype->map_b[idx] = (uint32_t *)
3782 xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
3783
3784 /* Copy values from collection. */
4b10dd6c 3785 for (idx2 = 0; idx2 < 256; ++idx2)
4c7d276e 3786 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 3787
75cd5204
RM
3788 /* Mirror first 127 entries. We must take care not to map entry
3789 -1 because EOF == -1. */
3790 for (idx2 = 0; idx2 < 127; ++idx2)
4c7d276e 3791 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
19bc17a9 3792
75cd5204 3793 /* EOF must map to EOF. */
4c7d276e 3794 ctype->map_b[idx][127] = EOF;
49f2be5b 3795 }
a9c27b3e 3796
49f2be5b
UD
3797 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3798 {
3799 unsigned int idx2;
3800
3801 /* Allocate table. */
4c7d276e 3802 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
49f2be5b 3803
4c7d276e 3804 /* Copy values from collection. Default is identity mapping. */
49f2be5b 3805 for (idx2 = 0; idx2 < 256; ++idx2)
4c7d276e
UD
3806 ctype->map32_b[idx][idx2] =
3807 (ctype->map_collection[idx][idx2] != 0
3808 ? ctype->map_collection[idx][idx2]
3809 : idx2);
ef446144
UD
3810 }
3811
4c7d276e 3812 for (nr = 0; nr < ctype->map_collection_nr; nr++)
ef446144 3813 {
1ecbb381 3814 struct wctrans_table *t;
ef446144 3815
1ecbb381
RS
3816 t = &ctype->map_3level[nr];
3817 t->p = 7;
3818 t->q = 9;
3819 wctrans_table_init (t);
ef446144 3820
4c7d276e
UD
3821 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3822 if (ctype->map_collection[nr][idx] != 0)
1ecbb381 3823 wctrans_table_add (t, ctype->charnames[idx],
4c7d276e 3824 ctype->map_collection[nr][idx]);
ef446144 3825
4c7d276e 3826 if (verbose)
f2b98f97
UD
3827 WITH_CUR_LOCALE (fprintf (stderr, _("\
3828%s: table for map \"%s\": %lu bytes\n"),
3829 "LC_CTYPE", ctype->mapnames[nr],
1ecbb381 3830 (unsigned long int) t->result_size));
19bc17a9
RM
3831 }
3832
3833 /* Extra array for class and map names. */
4b10dd6c
UD
3834 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3835 * sizeof (uint32_t));
3836 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3837 * sizeof (uint32_t));
75cd5204 3838
4c7d276e
UD
3839 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3840 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
5866b131 3841
4a9dcff1
UD
3842 /* Array for width information. Because the expected widths are very
3843 small (never larger than 2) we use only one single byte. This
3844 saves space.
3845 We put only printable characters in the table. wcwidth is specified
3846 to return -1 for non-printable characters. Doing the check here
3847 saves a run-time check.
3848 But we put L'\0' in the table. This again saves a run-time check. */
4c7d276e 3849 {
1ecbb381 3850 struct wcwidth_table *t;
ef446144 3851
1ecbb381
RS
3852 t = &ctype->width;
3853 t->p = 7;
3854 t->q = 9;
3855 wcwidth_table_init (t);
ef446144 3856
4a9dcff1
UD
3857 /* First set all the printable characters of the character set to
3858 the default width. */
4c7d276e
UD
3859 curs = NULL;
3860 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3861 {
3862 struct charseq *data = (struct charseq *) vdata;
0e16ecfa 3863
4c7d276e
UD
3864 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3865 data->ucs4 = repertoire_find_value (ctype->repertoire,
3866 data->name, len);
ef446144 3867
4c7d276e 3868 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4a9dcff1
UD
3869 {
3870 uint32_t *class_bits =
3871 find_idx (ctype, &ctype->class_collection, NULL,
3872 &ctype->class_collection_act, data->ucs4);
3873
3874 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
1ecbb381 3875 wcwidth_table_add (t, data->ucs4, charmap->width_default);
4a9dcff1 3876 }
4c7d276e 3877 }
ef446144 3878
4c7d276e
UD
3879 /* Now add the explicitly specified widths. */
3880 if (charmap->width_rules != NULL)
3881 {
3882 size_t cnt;
ef446144 3883
4c7d276e
UD
3884 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3885 {
3886 unsigned char bytes[charmap->mb_cur_max];
3887 int nbytes = charmap->width_rules[cnt].from->nbytes;
3888
3889 /* We have the range of character for which the width is
3890 specified described using byte sequences of the multibyte
3891 charset. We have to convert this to UCS4 now. And we
3892 cannot simply convert the beginning and the end of the
3893 sequence, we have to iterate over the byte sequence and
3894 convert it for every single character. */
3895 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3896
3897 while (nbytes < charmap->width_rules[cnt].to->nbytes
3898 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3899 nbytes) <= 0)
3900 {
3901 /* Find the UCS value for `bytes'. */
3902 int inner;
3903 uint32_t wch;
3904 struct charseq *seq =
701666b7 3905 charmap_find_symbol (charmap, (char *) bytes, nbytes);
0e16ecfa 3906
4c7d276e
UD
3907 if (seq == NULL)
3908 wch = ILLEGAL_CHAR_VALUE;
3909 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3910 wch = seq->ucs4;
3911 else
3912 wch = repertoire_find_value (ctype->repertoire, seq->name,
3913 strlen (seq->name));
ef446144 3914
4c7d276e 3915 if (wch != ILLEGAL_CHAR_VALUE)
4a9dcff1
UD
3916 {
3917 /* Store the value. */
3918 uint32_t *class_bits =
3919 find_idx (ctype, &ctype->class_collection, NULL,
3920 &ctype->class_collection_act, wch);
3921
3922 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
1ecbb381 3923 wcwidth_table_add (t, wch,
4a9dcff1
UD
3924 charmap->width_rules[cnt].width);
3925 }
ef446144 3926
4c7d276e
UD
3927 /* "Increment" the bytes sequence. */
3928 inner = nbytes - 1;
3929 while (inner >= 0 && bytes[inner] == 0xff)
3930 --inner;
0e16ecfa 3931
4c7d276e
UD
3932 if (inner < 0)
3933 {
3934 /* We have to extend the byte sequence. */
3935 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3936 break;
ef446144 3937
4c7d276e
UD
3938 bytes[0] = 1;
3939 memset (&bytes[1], 0, nbytes);
3940 ++nbytes;
3941 }
3942 else
3943 {
3944 ++bytes[inner];
3945 while (++inner < nbytes)
3946 bytes[inner] = 0;
3947 }
3948 }
3949 }
3950 }
ef446144 3951
4a9dcff1 3952 /* Set the width of L'\0' to 0. */
1ecbb381 3953 wcwidth_table_add (t, 0, 0);
ef446144 3954
4c7d276e 3955 if (verbose)
f2b98f97 3956 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
1ecbb381 3957 "LC_CTYPE", (unsigned long int) t->result_size));
4c7d276e 3958 }
0e16ecfa 3959
4b10dd6c
UD
3960 /* Set MB_CUR_MAX. */
3961 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 3962
4b10dd6c
UD
3963 /* Now determine the table for the transliteration information.
3964
3965 XXX It is not yet clear to me whether it is worth implementing a
3966 complicated algorithm which uses a hash table to locate the entries.
3967 For now I'll use a simple array which can be searching using binary
3968 search. */
02fb3d17
UD
3969 if (ctype->translit_include != NULL)
3970 /* Traverse the locales mentioned in the `include' statements in a
3971 depth-first way and fold in their transliteration information. */
3972 translit_flatten (ctype, charmap, &ctype->translit);
4b10dd6c
UD
3973
3974 if (ctype->translit != NULL)
3975 {
3976 /* First count how many entries we have. This is the upper limit
3977 since some entries from the included files might be overwritten. */
3978 size_t number = 0;
3979 size_t cnt;
3980 struct translit_t *runp = ctype->translit;
3981 struct translit_t **sorted;
3982 size_t from_len, to_len;
3983
3984 while (runp != NULL)
3985 {
3986 ++number;
3987 runp = runp->next;
3988 }
3989
3990 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
3991 sorted = (struct translit_t **) alloca (number
3992 * sizeof (struct translit_t **));
4b10dd6c
UD
3993 runp = ctype->translit;
3994 number = 0;
3995 do
3996 {
3997 /* Search for the place where to insert this string.
3998 XXX Better use a real sorting algorithm later. */
3999 size_t idx = 0;
4000 int replace = 0;
4001
4002 while (idx < number)
4003 {
4004 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4005 (const wchar_t *) runp->from);
4006 if (res == 0)
4007 {
4008 replace = 1;
4009 break;
4010 }
4011 if (res > 0)
4012 break;
4013 ++idx;
4014 }
4015
4016 if (replace)
4017 sorted[idx] = runp;
4018 else
4019 {
4020 memmove (&sorted[idx + 1], &sorted[idx],
4021 (number - idx) * sizeof (struct translit_t *));
4022 sorted[idx] = runp;
4023 ++number;
4024 }
4025
4026 runp = runp->next;
4027 }
4028 while (runp != NULL);
4029
4030 /* The next step is putting all the possible transliteration
4031 strings in one memory block so that we can write it out.
4032 We need several different blocks:
9ca23765 4033 - index to the from-string array
4b10dd6c
UD
4034 - from-string array
4035 - index to the to-string array
4036 - to-string array.
4b10dd6c
UD
4037 */
4038 from_len = to_len = 0;
4039 for (cnt = 0; cnt < number; ++cnt)
4040 {
4041 struct translit_to_t *srunp;
4042 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4043 srunp = sorted[cnt]->to;
4044 while (srunp != NULL)
4045 {
4046 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4047 srunp = srunp->next;
4048 }
4049 /* Plus one for the extra NUL character marking the end of
4050 the list for the current entry. */
4051 ++to_len;
4052 }
4053
4054 /* We can allocate the arrays for the results. */
4a33c2f5
UD
4055 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4056 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4057 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4058 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
4059
4060 from_len = 0;
4061 to_len = 0;
4062 for (cnt = 0; cnt < number; ++cnt)
4063 {
4064 size_t len;
4065 struct translit_to_t *srunp;
4066
4a33c2f5
UD
4067 ctype->translit_from_idx[cnt] = from_len;
4068 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
4069
4070 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 4071 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
4072 (const wchar_t *) sorted[cnt]->from, len);
4073 from_len += len;
4074
4a33c2f5 4075 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
4076 srunp = sorted[cnt]->to;
4077 while (srunp != NULL)
4078 {
4079 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 4080 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
4081 (const wchar_t *) srunp->str, len);
4082 to_len += len;
4083 srunp = srunp->next;
4084 }
4a33c2f5 4085 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 4086 }
4b10dd6c
UD
4087
4088 /* Store the information about the length. */
04fbc779 4089 ctype->translit_idx_size = number;
4b10dd6c
UD
4090 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4091 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4092 }
4093 else
4094 {
4095 /* Provide some dummy pointers since we have nothing to write out. */
4096 static uint32_t no_str = { 0 };
4097
4a33c2f5
UD
4098 ctype->translit_from_idx = &no_str;
4099 ctype->translit_from_tbl = &no_str;
4100 ctype->translit_to_tbl = &no_str;
4b10dd6c
UD
4101 ctype->translit_idx_size = 0;
4102 ctype->translit_from_tbl_size = 0;
4103 ctype->translit_to_tbl_size = 0;
4104 }
19bc17a9 4105}