]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
04277e02 1/* Copyright (C) 1995-2019 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
43bc8ac6 5 This program is free software; you can redistribute it and/or modify
2e2efe65
RM
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
19bc17a9 9
43bc8ac6 10 This program is distributed in the hope that it will be useful,
c84142e8 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
19bc17a9 14
43bc8ac6 15 You should have received a copy of the GNU General Public License
5a82c748 16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
19bc17a9
RM
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
a68b0d31 22#include <alloca.h>
4b10dd6c 23#include <byteswap.h>
19bc17a9 24#include <endian.h>
4b10dd6c 25#include <errno.h>
19bc17a9 26#include <limits.h>
4b10dd6c
UD
27#include <obstack.h>
28#include <stdlib.h>
19bc17a9 29#include <string.h>
4b10dd6c
UD
30#include <wchar.h>
31#include <wctype.h>
e054f494 32#include <stdint.h>
4b10dd6c 33#include <sys/uio.h>
19bc17a9 34
f2b98f97 35#include "localedef.h"
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c 41#include "locfile.h"
19bc17a9 42
19bc17a9
RM
43#include <assert.h>
44
45
19bc17a9
RM
46/* The bit used for representing a special class. */
47#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
48#define BIT(class) (_ISbit (BITPOS (class)))
49#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
50
51#define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
54
19bc17a9
RM
55
56/* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
4b10dd6c 59#define char_class_t uint16_t
4b10dd6c 60#define char_class32_t uint32_t
4b10dd6c
UD
61
62
63/* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
67struct translit_to_t
68{
69 uint32_t *str;
70
71 struct translit_to_t *next;
72};
73
74struct translit_t
75{
76 uint32_t *from;
77
a673fbcb
UD
78 const char *fname;
79 size_t lineno;
80
4b10dd6c
UD
81 struct translit_to_t *to;
82
83 struct translit_t *next;
84};
19bc17a9 85
a673fbcb
UD
86struct translit_ignore_t
87{
88 uint32_t from;
89 uint32_t to;
a0dc5206 90 uint32_t step;
a673fbcb
UD
91
92 const char *fname;
93 size_t lineno;
94
95 struct translit_ignore_t *next;
96};
97
19bc17a9 98
02fb3d17 99/* Type to describe a transliteration include statement. */
0a12bf88 100struct translit_include_t
02fb3d17
UD
101{
102 const char *copy_locale;
103 const char *copy_repertoire;
104
105 struct translit_include_t *next;
106};
107
363a9899
OB
108/* Provide some dummy pointer for empty string. */
109static uint32_t no_str[] = { 0 };
110
02fb3d17 111
601d2942
UD
112/* Sparse table of uint32_t. */
113#define TABLE idx_table
114#define ELEMENT uint32_t
bd75759f 115#define DEFAULT ((uint32_t) ~0)
1ecbb381 116#define NO_ADD_LOCALE
601d2942
UD
117#include "3level.h"
118
1ecbb381
RS
119#define TABLE wcwidth_table
120#define ELEMENT uint8_t
121#define DEFAULT 0xff
122#include "3level.h"
123
124#define TABLE wctrans_table
125#define ELEMENT int32_t
126#define DEFAULT 0
127#define wctrans_table_add wctrans_table_add_internal
128#include "3level.h"
129#undef wctrans_table_add
130/* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
132static inline void
133wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134{
135 wctrans_table_add_internal (t, wc, mapped_wc - wc);
136}
137
138/* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
140
141struct wctype_table
142{
143 /* Parameters. */
144 unsigned int p;
145 unsigned int q;
146 /* Working representation. */
147 size_t level1_alloc;
148 size_t level1_size;
149 uint32_t *level1;
150 size_t level2_alloc;
151 size_t level2_size;
152 uint32_t *level2;
153 size_t level3_alloc;
154 size_t level3_size;
155 uint32_t *level3;
156 size_t result_size;
157};
158
159static void add_locale_wctype_table (struct locale_file *file,
160 struct wctype_table *t);
601d2942 161
19bc17a9
RM
162/* The real definition of the struct for the LC_CTYPE locale. */
163struct locale_ctype_t
164{
4b10dd6c 165 uint32_t *charnames;
19bc17a9
RM
166 size_t charnames_max;
167 size_t charnames_act;
04ea3b0f 168 /* An index lookup table, to speedup find_idx. */
601d2942 169 struct idx_table charnames_idx;
19bc17a9 170
4b10dd6c
UD
171 struct repertoire_t *repertoire;
172
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 175 size_t nr_charclass;
19bc17a9 176 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
177 uint32_t last_class_char;
178 uint32_t class256_collection[256];
179 uint32_t *class_collection;
19bc17a9
RM
180 size_t class_collection_max;
181 size_t class_collection_act;
4b10dd6c 182 uint32_t class_done;
ef446144 183 uint32_t class_offset;
4b10dd6c
UD
184
185 struct charseq **mbdigits;
186 size_t mbdigits_act;
187 size_t mbdigits_max;
188 uint32_t *wcdigits;
189 size_t wcdigits_act;
190 size_t wcdigits_max;
191
192 struct charseq *mboutdigits[10];
193 uint32_t wcoutdigits[10];
194 size_t outdigits_act;
19bc17a9
RM
195
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198#define MAX_NR_CHARMAP 16
199 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
200 uint32_t *map_collection[MAX_NR_CHARMAP];
201 uint32_t map256_collection[2][256];
9a0a462c
UD
202 size_t map_collection_max[MAX_NR_CHARMAP];
203 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
204 size_t map_collection_nr;
205 size_t last_map_idx;
4b10dd6c 206 int tomap_done[MAX_NR_CHARMAP];
ef446144 207 uint32_t map_offset;
4b10dd6c
UD
208
209 /* Transliteration information. */
02fb3d17 210 struct translit_include_t *translit_include;
4b10dd6c 211 struct translit_t *translit;
a673fbcb 212 struct translit_ignore_t *translit_ignore;
a8e4c924 213 uint32_t ntranslit_ignore;
a673fbcb
UD
214
215 uint32_t *default_missing;
216 const char *default_missing_file;
217 size_t default_missing_lineno;
19bc17a9 218
f0c7c524 219 uint32_t to_nonascii;
8a449450 220 uint32_t nonascii_case;
f0c7c524 221
19bc17a9 222 /* The arrays for the binary representation. */
19bc17a9
RM
223 char_class_t *ctype_b;
224 char_class32_t *ctype32_b;
4c7d276e
UD
225 uint32_t **map_b;
226 uint32_t **map32_b;
8fb81470 227 uint32_t **class_b;
1ecbb381
RS
228 struct wctype_table *class_3level;
229 struct wctrans_table *map_3level;
4b10dd6c
UD
230 uint32_t *class_name_ptr;
231 uint32_t *map_name_ptr;
1ecbb381 232 struct wcwidth_table width;
4b10dd6c 233 uint32_t mb_cur_max;
6990326c 234 const char *codeset_name;
4a33c2f5
UD
235 uint32_t *translit_from_idx;
236 uint32_t *translit_from_tbl;
237 uint32_t *translit_to_idx;
238 uint32_t *translit_to_tbl;
04fbc779 239 uint32_t translit_idx_size;
4b10dd6c
UD
240 size_t translit_from_tbl_size;
241 size_t translit_to_tbl_size;
242
a673fbcb 243 struct obstack mempool;
19bc17a9
RM
244};
245
47e8b443 246
bd75759f
AJ
247/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249#define EMPTY ((uint32_t) ~0)
250
19bc17a9 251
4b10dd6c
UD
252#define obstack_chunk_alloc xmalloc
253#define obstack_chunk_free free
254
255
19bc17a9 256/* Prototypes for local functions. */
4b10dd6c 257static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
47e8b443 258 const struct charmap_t *charmap,
a6bd56c7
UD
259 struct localedef_t *copy_locale,
260 int ignore_content);
4b10dd6c
UD
261static void ctype_class_new (struct linereader *lr,
262 struct locale_ctype_t *ctype, const char *name);
263static void ctype_map_new (struct linereader *lr,
264 struct locale_ctype_t *ctype,
47e8b443 265 const char *name, const struct charmap_t *charmap);
4b10dd6c 266static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
08ffcf34 267 size_t *max, size_t *act, uint32_t idx);
19bc17a9 268static void set_class_defaults (struct locale_ctype_t *ctype,
47e8b443 269 const struct charmap_t *charmap,
4b10dd6c 270 struct repertoire_t *repertoire);
75cd5204 271static void allocate_arrays (struct locale_ctype_t *ctype,
47e8b443 272 const struct charmap_t *charmap,
4b10dd6c 273 struct repertoire_t *repertoire);
19bc17a9
RM
274
275
4b10dd6c
UD
276static const char *longnames[] =
277{
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
280};
1b97149d
UD
281static const char *uninames[] =
282{
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285};
4b10dd6c
UD
286static const unsigned char digits[] = "0123456789";
287
288
289static void
19bc17a9 290ctype_startup (struct linereader *lr, struct localedef_t *locale,
47e8b443
UD
291 const struct charmap_t *charmap,
292 struct localedef_t *copy_locale, int ignore_content)
19bc17a9
RM
293{
294 unsigned int cnt;
295 struct locale_ctype_t *ctype;
296
a6bd56c7 297 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
19bc17a9 298 {
a6bd56c7
UD
299 if (copy_locale == NULL)
300 {
301 /* Allocate the needed room. */
302 locale->categories[LC_CTYPE].ctype = ctype =
303 (struct locale_ctype_t *) xcalloc (1,
304 sizeof (struct locale_ctype_t));
305
306 /* We have seen no names yet. */
307 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
08ffcf34
RS
308 ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309 * sizeof (uint32_t));
a6bd56c7
UD
310 for (cnt = 0; cnt < 256; ++cnt)
311 ctype->charnames[cnt] = cnt;
312 ctype->charnames_act = 256;
601d2942 313 idx_table_init (&ctype->charnames_idx);
a6bd56c7
UD
314
315 /* Fill character class information. */
316 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317 /* The order of the following instructions determines the bit
318 positions! */
319 ctype_class_new (lr, ctype, "upper");
320 ctype_class_new (lr, ctype, "lower");
321 ctype_class_new (lr, ctype, "alpha");
322 ctype_class_new (lr, ctype, "digit");
323 ctype_class_new (lr, ctype, "xdigit");
324 ctype_class_new (lr, ctype, "space");
325 ctype_class_new (lr, ctype, "print");
326 ctype_class_new (lr, ctype, "graph");
327 ctype_class_new (lr, ctype, "blank");
328 ctype_class_new (lr, ctype, "cntrl");
329 ctype_class_new (lr, ctype, "punct");
330 ctype_class_new (lr, ctype, "alnum");
4b10dd6c 331
a6bd56c7
UD
332 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
333 ctype->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype->class_collection_max);
336 ctype->class_collection_act = 256;
4b10dd6c 337
a6bd56c7
UD
338 /* Fill character map information. */
339 ctype->last_map_idx = MAX_NR_CHARMAP;
340 ctype_map_new (lr, ctype, "toupper", charmap);
341 ctype_map_new (lr, ctype, "tolower", charmap);
4b10dd6c 342
a6bd56c7
UD
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt = 0; cnt < 256; ++cnt)
345 {
346 ctype->map_collection[0][cnt] = cnt;
347 ctype->map_collection[1][cnt] = cnt;
fc5771e4 348
a6bd56c7
UD
349 ctype->map256_collection[0][cnt] = cnt;
350 ctype->map256_collection[1][cnt] = cnt;
351 }
4b10dd6c 352
cb2eab1f
UD
353 if (enc_not_ascii_compatible)
354 ctype->to_nonascii = 1;
355
a6bd56c7
UD
356 obstack_init (&ctype->mempool);
357 }
358 else
359 ctype = locale->categories[LC_CTYPE].ctype =
360 copy_locale->categories[LC_CTYPE].ctype;
19bc17a9
RM
361 }
362}
363
364
365void
47e8b443 366ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
19bc17a9
RM
367{
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
369#define NCLASS 12
370 static const struct
371 {
372 const char *name;
373 const char allow[NCLASS];
374 }
375 valid_table[NCLASS] =
376 {
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
391 };
392 size_t cnt;
393 int cls1, cls2;
4b10dd6c
UD
394 uint32_t space_value;
395 struct charseq *space_seq;
19bc17a9 396 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 397 int warned;
0e16ecfa
UD
398 const void *key;
399 size_t len;
400 void *vdata;
401 void *curs;
19bc17a9 402
b9eb05d6
UD
403 /* Now resolve copying and also handle completely missing definitions. */
404 if (ctype == NULL)
405 {
70e51ab9
UD
406 const char *repertoire_name;
407
b9eb05d6
UD
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale->copy_name[LC_CTYPE] != NULL)
411 {
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t *from = locale;
415
416 do
417 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418 from->repertoire_name, charmap);
419 while (from->categories[LC_CTYPE].ctype == NULL
420 && from->copy_name[LC_CTYPE] != NULL);
421
422 ctype = locale->categories[LC_CTYPE].ctype
423 = from->categories[LC_CTYPE].ctype;
424 }
425
426 /* If there is still no definition issue an warning and create an
427 empty one. */
428 if (ctype == NULL)
429 {
f16491eb
CD
430 record_warning (_("\
431No definition for %s category found"), "LC_CTYPE");
a6bd56c7 432 ctype_startup (NULL, locale, charmap, NULL, 0);
b9eb05d6
UD
433 ctype = locale->categories[LC_CTYPE].ctype;
434 }
70e51ab9
UD
435
436 /* Get the repertoire we have to use. */
437 repertoire_name = locale->repertoire_name ?: repertoire_global;
438 if (repertoire_name != NULL)
439 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
440 }
441
db76d943
UD
442 /* We need the name of the currently used 8-bit character set to
443 make correct conversion between this 8-bit representation and the
444 ISO 10646 character set used internally for wide characters. */
445 ctype->codeset_name = charmap->code_set_name;
446 if (ctype->codeset_name == NULL)
447 {
f16491eb
CD
448 record_error (0, 0, _("\
449No character set name specified in charmap"));
db76d943
UD
450 ctype->codeset_name = "//UNKNOWN//";
451 }
452
19bc17a9 453 /* Set default value for classes not specified. */
4b10dd6c 454 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
455
456 /* Check according to table. */
42d7c593 457 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 458 {
4b10dd6c 459 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 460
4b10dd6c
UD
461 if (tmp != 0)
462 {
463 for (cls1 = 0; cls1 < NCLASS; ++cls1)
464 if ((tmp & _ISwbit (cls1)) != 0)
465 for (cls2 = 0; cls2 < NCLASS; ++cls2)
466 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 467 {
4b10dd6c
UD
468 int eq = (tmp & _ISwbit (cls2)) != 0;
469 switch (valid_table[cls1].allow[cls2])
19bc17a9 470 {
4b10dd6c
UD
471 case 'M':
472 if (!eq)
473 {
474 uint32_t value = ctype->charnames[cnt];
475
f16491eb 476 record_error (0, 0, _("\
4b10dd6c 477character L'\\u%0*x' in class `%s' must be in class `%s'"),
f16491eb
CD
478 value > 0xffff ? 8 : 4,
479 value,
480 valid_table[cls1].name,
481 valid_table[cls2].name);
4b10dd6c
UD
482 }
483 break;
484
485 case 'X':
486 if (eq)
487 {
488 uint32_t value = ctype->charnames[cnt];
489
f16491eb 490 record_error (0, 0, _("\
4b10dd6c 491character L'\\u%0*x' in class `%s' must not be in class `%s'"),
f16491eb
CD
492 value > 0xffff ? 8 : 4,
493 value,
494 valid_table[cls1].name,
495 valid_table[cls2].name);
4b10dd6c
UD
496 }
497 break;
498
499 case 'D':
500 ctype->class_collection[cnt] |= _ISwbit (cls2);
501 break;
502
503 default:
f16491eb
CD
504 record_error (5, 0, _("\
505internal error in %s, line %u"), __FUNCTION__, __LINE__);
19bc17a9 506 }
4b10dd6c
UD
507 }
508 }
509 }
510
511 for (cnt = 0; cnt < 256; ++cnt)
512 {
513 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 514
4b10dd6c
UD
515 if (tmp != 0)
516 {
517 for (cls1 = 0; cls1 < NCLASS; ++cls1)
518 if ((tmp & _ISbit (cls1)) != 0)
519 for (cls2 = 0; cls2 < NCLASS; ++cls2)
520 if (valid_table[cls1].allow[cls2] != '-')
521 {
522 int eq = (tmp & _ISbit (cls2)) != 0;
523 switch (valid_table[cls1].allow[cls2])
19bc17a9 524 {
4b10dd6c
UD
525 case 'M':
526 if (!eq)
527 {
528 char buf[17];
529
5d431a3e 530 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c 531
f16491eb 532 record_error (0, 0, _("\
4b10dd6c 533character '%s' in class `%s' must be in class `%s'"),
f16491eb
CD
534 buf,
535 valid_table[cls1].name,
536 valid_table[cls2].name);
4b10dd6c
UD
537 }
538 break;
539
540 case 'X':
541 if (eq)
542 {
543 char buf[17];
544
5d431a3e 545 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c 546
f16491eb 547 record_error (0, 0, _("\
4b10dd6c 548character '%s' in class `%s' must not be in class `%s'"),
f16491eb
CD
549 buf,
550 valid_table[cls1].name,
551 valid_table[cls2].name);
4b10dd6c
UD
552 }
553 break;
554
555 case 'D':
556 ctype->class256_collection[cnt] |= _ISbit (cls2);
557 break;
558
559 default:
f16491eb
CD
560 record_error (5, 0, _("\
561internal error in %s, line %u"), __FUNCTION__, __LINE__);
19bc17a9 562 }
4b10dd6c
UD
563 }
564 }
19bc17a9
RM
565 }
566
567 /* ... and now test <SP> as a special case. */
a0dc5206
UD
568 space_value = 32;
569 if (((cnt = BITPOS (tok_space),
570 (ELEM (ctype, class_collection, , space_value)
571 & BITw (tok_space)) == 0)
572 || (cnt = BITPOS (tok_blank),
573 (ELEM (ctype, class_collection, , space_value)
574 & BITw (tok_blank)) == 0)))
880f421f 575 {
f16491eb
CD
576 record_error (0, 0, _("<SP> character not in class `%s'"),
577 valid_table[cnt].name);
880f421f 578 }
c84142e8
UD
579 else if (((cnt = BITPOS (tok_punct),
580 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 581 & BITw (tok_punct)) != 0)
c84142e8
UD
582 || (cnt = BITPOS (tok_graph),
583 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 584 & BITw (tok_graph))
880f421f
UD
585 != 0)))
586 {
f16491eb 587 record_error (0, 0, _("\
f2b98f97 588<SP> character must not be in class `%s'"),
f16491eb 589 valid_table[cnt].name);
880f421f 590 }
19bc17a9 591 else
4b10dd6c
UD
592 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
593
594 space_seq = charmap_find_value (charmap, "SP", 2);
ce177a84 595 if (space_seq == NULL)
45c95239
UD
596 space_seq = charmap_find_value (charmap, "space", 5);
597 if (space_seq == NULL)
1b97149d 598 space_seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
599 if (space_seq == NULL || space_seq->nbytes != 1)
600 {
f16491eb
CD
601 record_error (0, 0, _("\
602character <SP> not defined in character map"));
4b10dd6c
UD
603 }
604 else if (((cnt = BITPOS (tok_space),
605 (ctype->class256_collection[space_seq->bytes[0]]
606 & BIT (tok_space)) == 0)
607 || (cnt = BITPOS (tok_blank),
608 (ctype->class256_collection[space_seq->bytes[0]]
609 & BIT (tok_blank)) == 0)))
610 {
f16491eb
CD
611 record_error (0, 0, _("<SP> character not in class `%s'"),
612 valid_table[cnt].name);
4b10dd6c
UD
613 }
614 else if (((cnt = BITPOS (tok_punct),
615 (ctype->class256_collection[space_seq->bytes[0]]
616 & BIT (tok_punct)) != 0)
617 || (cnt = BITPOS (tok_graph),
618 (ctype->class256_collection[space_seq->bytes[0]]
619 & BIT (tok_graph)) != 0)))
620 {
f16491eb 621 record_error (0, 0, _("\
f2b98f97 622<SP> character must not be in class `%s'"),
f16491eb 623 valid_table[cnt].name);
4b10dd6c
UD
624 }
625 else
626 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204 627
8a449450
UD
628 /* Check whether all single-byte characters make to their upper/lowercase
629 equivalent according to the ASCII rules. */
630 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
631 {
632 uint32_t uppval = ctype->map256_collection[0][cnt];
633 uint32_t lowval = ctype->map256_collection[1][cnt];
634 uint32_t lowuppval = ctype->map256_collection[0][lowval];
635 uint32_t lowlowval = ctype->map256_collection[1][lowval];
636
637 if (uppval != cnt
638 || lowval != cnt + 0x20
639 || lowuppval != cnt
640 || lowlowval != cnt + 0x20)
641 ctype->nonascii_case = 1;
642 }
643 for (cnt = 0; cnt < 256; ++cnt)
644 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
645 if (ctype->map256_collection[0][cnt] != cnt
646 || ctype->map256_collection[1][cnt] != cnt)
647 ctype->nonascii_case = 1;
648
75cd5204
RM
649 /* Now that the tests are done make sure the name array contains all
650 characters which are handled in the WIDTH section of the
651 character set definition file. */
4b10dd6c
UD
652 if (charmap->width_rules != NULL)
653 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 654 {
827ff758
UD
655 unsigned char bytes[charmap->mb_cur_max];
656 int nbytes = charmap->width_rules[cnt].from->nbytes;
657
658 /* We have the range of character for which the width is
659 specified described using byte sequences of the multibyte
660 charset. We have to convert this to UCS4 now. And we
661 cannot simply convert the beginning and the end of the
662 sequence, we have to iterate over the byte sequence and
663 convert it for every single character. */
664 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
665
666 while (nbytes < charmap->width_rules[cnt].to->nbytes
667 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
668 nbytes) <= 0)
669 {
670 /* Find the UCS value for `bytes'. */
827ff758 671 int inner;
76e680a8 672 uint32_t wch;
701666b7
UD
673 struct charseq *seq
674 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
76e680a8
UD
675
676 if (seq == NULL)
677 wch = ILLEGAL_CHAR_VALUE;
678 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
679 wch = seq->ucs4;
680 else
681 wch = repertoire_find_value (ctype->repertoire, seq->name,
682 strlen (seq->name));
827ff758
UD
683
684 if (wch != ILLEGAL_CHAR_VALUE)
685 /* We are only interested in the side-effects of the
686 `find_idx' call. It will add appropriate entries in
687 the name array if this is necessary. */
688 (void) find_idx (ctype, NULL, NULL, NULL, wch);
689
690 /* "Increment" the bytes sequence. */
691 inner = nbytes - 1;
692 while (inner >= 0 && bytes[inner] == 0xff)
693 --inner;
694
695 if (inner < 0)
696 {
697 /* We have to extend the byte sequence. */
698 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
699 break;
700
701 bytes[0] = 1;
702 memset (&bytes[1], 0, nbytes);
703 ++nbytes;
704 }
705 else
706 {
707 ++bytes[inner];
708 while (++inner < nbytes)
709 bytes[inner] = 0;
710 }
711 }
4b10dd6c
UD
712 }
713
0e16ecfa
UD
714 /* Now set all the other characters of the character set to the
715 default width. */
716 curs = NULL;
717 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
718 {
719 struct charseq *data = (struct charseq *) vdata;
720
721 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
722 data->ucs4 = repertoire_find_value (ctype->repertoire,
723 data->name, len);
724
725 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
726 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
727 }
728
4b10dd6c
UD
729 /* There must be a multiple of 10 digits. */
730 if (ctype->mbdigits_act % 10 != 0)
731 {
732 assert (ctype->mbdigits_act == ctype->wcdigits_act);
733 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
734 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
f16491eb
CD
735 record_error (0, 0, _("\
736`digit' category has not entries in groups of ten"));
4b10dd6c
UD
737 }
738
739 /* Check the input digits. There must be a multiple of ten available.
42d7c593 740 In each group it could be that one or the other character is missing.
4b10dd6c
UD
741 In this case the whole group must be removed. */
742 cnt = 0;
743 while (cnt < ctype->mbdigits_act)
744 {
745 size_t inner;
746 for (inner = 0; inner < 10; ++inner)
747 if (ctype->mbdigits[cnt + inner] == NULL)
748 break;
749
750 if (inner == 10)
751 cnt += 10;
752 else
753 {
754 /* Remove the group. */
755 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
756 ((ctype->wcdigits_act - cnt - 10)
757 * sizeof (ctype->mbdigits[0])));
758 ctype->mbdigits_act -= 10;
759 }
760 }
761
762 /* If no input digits are given use the default. */
763 if (ctype->mbdigits_act == 0)
764 {
765 if (ctype->mbdigits_max == 0)
766 {
47e8b443 767 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
768 10 * sizeof (struct charseq *));
769 ctype->mbdigits_max = 10;
770 }
771
772 for (cnt = 0; cnt < 10; ++cnt)
773 {
774 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
701666b7 775 (char *) digits + cnt, 1);
4b10dd6c
UD
776 if (ctype->mbdigits[cnt] == NULL)
777 {
778 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
779 longnames[cnt],
780 strlen (longnames[cnt]));
781 if (ctype->mbdigits[cnt] == NULL)
782 {
783 /* Hum, this ain't good. */
f16491eb
CD
784 record_error (0, 0, _("\
785no input digits defined and none of the standard names in the charmap"));
4b10dd6c 786
47e8b443 787 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
788 sizeof (struct charseq) + 1);
789
790 /* This is better than nothing. */
791 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
792 ctype->mbdigits[cnt]->nbytes = 1;
793 }
794 }
795 }
796
797 ctype->mbdigits_act = 10;
798 }
799
800 /* Check the wide character input digits. There must be a multiple
42d7c593 801 of ten available. In each group it could be that one or the other
4b10dd6c
UD
802 character is missing. In this case the whole group must be
803 removed. */
804 cnt = 0;
805 while (cnt < ctype->wcdigits_act)
806 {
807 size_t inner;
808 for (inner = 0; inner < 10; ++inner)
809 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
810 break;
811
812 if (inner == 10)
813 cnt += 10;
814 else
815 {
816 /* Remove the group. */
817 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
818 ((ctype->wcdigits_act - cnt - 10)
819 * sizeof (ctype->wcdigits[0])));
820 ctype->wcdigits_act -= 10;
821 }
822 }
823
824 /* If no input digits are given use the default. */
825 if (ctype->wcdigits_act == 0)
826 {
827 if (ctype->wcdigits_max == 0)
828 {
47e8b443 829 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
4b10dd6c
UD
830 10 * sizeof (uint32_t));
831 ctype->wcdigits_max = 10;
832 }
833
834 for (cnt = 0; cnt < 10; ++cnt)
835 ctype->wcdigits[cnt] = L'0' + cnt;
836
837 ctype->mbdigits_act = 10;
838 }
839
840 /* Check the outdigits. */
841 warned = 0;
842 for (cnt = 0; cnt < 10; ++cnt)
843 if (ctype->mboutdigits[cnt] == NULL)
844 {
4b10dd6c
UD
845 if (!warned)
846 {
f16491eb
CD
847 record_error (0, 0, _("\
848not all characters used in `outdigit' are available in the charmap"));
4b10dd6c
UD
849 warned = 1;
850 }
851
1471fa55
FW
852 static const struct charseq replace =
853 {
854 .nbytes = 1,
855 .bytes = "?",
856 };
857 ctype->mboutdigits[cnt] = (struct charseq *) &replace;
4b10dd6c
UD
858 }
859
860 warned = 0;
861 for (cnt = 0; cnt < 10; ++cnt)
862 if (ctype->wcoutdigits[cnt] == 0)
863 {
864 if (!warned)
865 {
f16491eb
CD
866 record_error (0, 0, _("\
867not all characters used in `outdigit' are available in the repertoire"));
4b10dd6c
UD
868 warned = 1;
869 }
870
871 ctype->wcoutdigits[cnt] = L'?';
75cd5204 872 }
a8e4c924
UD
873
874 /* Sort the entries in the translit_ignore list. */
875 if (ctype->translit_ignore != NULL)
876 {
877 struct translit_ignore_t *firstp = ctype->translit_ignore;
878 struct translit_ignore_t *runp;
879
880 ctype->ntranslit_ignore = 1;
881
882 for (runp = firstp->next; runp != NULL; runp = runp->next)
883 {
884 struct translit_ignore_t *lastp = NULL;
885 struct translit_ignore_t *cmpp;
886
887 ++ctype->ntranslit_ignore;
888
889 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
890 if (runp->from < cmpp->from)
891 break;
892
893 runp->next = lastp;
894 if (lastp == NULL)
895 firstp = runp;
896 }
897
898 ctype->translit_ignore = firstp;
899 }
19bc17a9
RM
900}
901
902
903void
47e8b443 904ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
75cd5204 905 const char *output_path)
19bc17a9
RM
906{
907 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
a9706118 908 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
4c7d276e 909 + ctype->nr_charclass + ctype->map_collection_nr);
1ecbb381 910 struct locale_file file;
1d96d74d 911 uint32_t default_missing_len;
1ecbb381 912 size_t elem, cnt;
19bc17a9
RM
913
914 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 915 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9 916
1ecbb381
RS
917 default_missing_len = (ctype->default_missing
918 ? wcslen ((wchar_t *) ctype->default_missing)
919 : 0);
19bc17a9 920
1ecbb381 921 init_locale_data (&file, nelems);
19bc17a9
RM
922 for (elem = 0; elem < nelems; ++elem)
923 {
a9706118 924 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
19bc17a9
RM
925 switch (elem)
926 {
c6df09ad
UD
927#define CTYPE_EMPTY(name) \
928 case name: \
1ecbb381 929 add_locale_empty (&file); \
c6df09ad
UD
930 break
931
932 CTYPE_EMPTY(_NL_CTYPE_GAP1);
933 CTYPE_EMPTY(_NL_CTYPE_GAP2);
934 CTYPE_EMPTY(_NL_CTYPE_GAP3);
4c7d276e
UD
935 CTYPE_EMPTY(_NL_CTYPE_GAP4);
936 CTYPE_EMPTY(_NL_CTYPE_GAP5);
937 CTYPE_EMPTY(_NL_CTYPE_GAP6);
c6df09ad 938
1ecbb381 939#define CTYPE_RAW_DATA(name, base, size) \
19bc17a9 940 case _NL_ITEM_INDEX (name): \
1ecbb381 941 add_locale_raw_data (&file, base, size); \
19bc17a9
RM
942 break
943
1ecbb381
RS
944 CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
945 ctype->ctype_b,
946 (256 + 128) * sizeof (char_class_t));
19bc17a9 947
1ecbb381
RS
948#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949 case _NL_ITEM_INDEX (name): \
950 add_locale_uint32_array (&file, base, n_elems); \
951 break
ef446144 952
1ecbb381
RS
953 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
954 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
955 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
956 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
957 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
958 ctype->ctype32_b,
959 256 * sizeof (char_class32_t));
ef446144 960
1ecbb381
RS
961#define CTYPE_UINT32(name, value) \
962 case _NL_ITEM_INDEX (name): \
963 add_locale_uint32 (&file, value); \
964 break
4a33c2f5 965
1ecbb381
RS
966 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
967 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
968 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
4a33c2f5 969
1ecbb381
RS
970 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
971 ctype->translit_from_idx,
972 ctype->translit_idx_size);
4b10dd6c 973
1ecbb381
RS
974 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
975 ctype->translit_from_tbl,
976 ctype->translit_from_tbl_size
977 / sizeof (uint32_t));
4b10dd6c 978
1ecbb381
RS
979 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
980 ctype->translit_to_idx,
981 ctype->translit_idx_size);
4b10dd6c 982
1ecbb381
RS
983 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
984 ctype->translit_to_tbl,
985 ctype->translit_to_tbl_size / sizeof (uint32_t));
4b10dd6c 986
75cd5204
RM
987 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
988 /* The class name array. */
1ecbb381
RS
989 start_locale_structure (&file);
990 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
991 add_locale_string (&file, ctype->classnames[cnt]);
992 add_locale_char (&file, 0);
7602d070 993 align_locale_data (&file, LOCFILE_ALIGN);
1ecbb381 994 end_locale_structure (&file);
75cd5204
RM
995 break;
996
997 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
998 /* The class name array. */
1ecbb381
RS
999 start_locale_structure (&file);
1000 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1001 add_locale_string (&file, ctype->mapnames[cnt]);
1002 add_locale_char (&file, 0);
7602d070 1003 align_locale_data (&file, LOCFILE_ALIGN);
1ecbb381 1004 end_locale_structure (&file);
75cd5204 1005 break;
19bc17a9 1006
1ecbb381
RS
1007 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1008 add_locale_wcwidth_table (&file, &ctype->width);
1009 break;
19bc17a9 1010
1ecbb381 1011 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
0200214b 1012
ce7a5ef4 1013 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1ecbb381 1014 add_locale_string (&file, ctype->codeset_name);
4b10dd6c
UD
1015 break;
1016
1ecbb381 1017 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
f0c7c524 1018
1ecbb381 1019 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
8a449450 1020
4a33c2f5 1021 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1ecbb381 1022 add_locale_uint32 (&file, ctype->mbdigits_act / 10);
4b10dd6c
UD
1023 break;
1024
4a33c2f5 1025 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1ecbb381 1026 add_locale_uint32 (&file, ctype->wcdigits_act / 10);
4b10dd6c
UD
1027 break;
1028
e43e0dd6 1029 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1ecbb381 1030 start_locale_structure (&file);
498b733e 1031 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1032 cnt < ctype->mbdigits_act; cnt += 10)
1033 {
1ecbb381
RS
1034 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1035 ctype->mbdigits[cnt]->nbytes);
1036 add_locale_char (&file, 0);
4b10dd6c 1037 }
1ecbb381 1038 end_locale_structure (&file);
4b10dd6c
UD
1039 break;
1040
1041 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1ecbb381 1042 start_locale_structure (&file);
498b733e 1043 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1ecbb381
RS
1044 add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1045 ctype->mboutdigits[cnt]->nbytes);
1046 add_locale_char (&file, 0);
1047 end_locale_structure (&file);
4b10dd6c
UD
1048 break;
1049
e43e0dd6 1050 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1ecbb381 1051 start_locale_structure (&file);
498b733e 1052 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
4b10dd6c 1053 cnt < ctype->wcdigits_act; cnt += 10)
1ecbb381
RS
1054 add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1055 end_locale_structure (&file);
4b10dd6c
UD
1056 break;
1057
1ecbb381 1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
498b733e 1059 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1ecbb381 1060 add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
4b10dd6c
UD
1061 break;
1062
a8e4c924 1063 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1ecbb381 1064 add_locale_uint32 (&file, default_missing_len);
a8e4c924
UD
1065 break;
1066
1d96d74d 1067 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1ecbb381
RS
1068 add_locale_uint32_array (&file, ctype->default_missing,
1069 default_missing_len);
1d96d74d
UD
1070 break;
1071
a8e4c924 1072 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1ecbb381 1073 add_locale_uint32 (&file, ctype->ntranslit_ignore);
a8e4c924
UD
1074 break;
1075
1076 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1ecbb381 1077 start_locale_structure (&file);
a8e4c924 1078 {
a8e4c924 1079 struct translit_ignore_t *runp;
a8e4c924
UD
1080 for (runp = ctype->translit_ignore; runp != NULL;
1081 runp = runp->next)
1082 {
1ecbb381
RS
1083 add_locale_uint32 (&file, runp->from);
1084 add_locale_uint32 (&file, runp->to);
1085 add_locale_uint32 (&file, runp->step);
a8e4c924
UD
1086 }
1087 }
1ecbb381 1088 end_locale_structure (&file);
1d96d74d
UD
1089 break;
1090
19bc17a9
RM
1091 default:
1092 assert (! "unknown CTYPE element");
1093 }
1094 else
1095 {
1096 /* Handle extra maps. */
4c7d276e
UD
1097 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1098 if (nr < ctype->nr_charclass)
ef446144 1099 {
1ecbb381
RS
1100 start_locale_prelude (&file);
1101 add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1102 end_locale_prelude (&file);
1103 add_locale_wctype_table (&file, &ctype->class_3level[nr]);
ef446144
UD
1104 }
1105 else
1106 {
4c7d276e
UD
1107 nr -= ctype->nr_charclass;
1108 assert (nr < ctype->map_collection_nr);
1ecbb381 1109 add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
ef446144 1110 }
75cd5204 1111 }
19bc17a9 1112 }
19bc17a9 1113
1ecbb381 1114 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
19bc17a9
RM
1115}
1116
1117
4b10dd6c
UD
1118/* Local functions. */
1119static void
1120ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1121 const char *name)
19bc17a9 1122{
4b10dd6c 1123 size_t cnt;
19bc17a9 1124
4b10dd6c
UD
1125 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1126 if (strcmp (ctype->classnames[cnt], name) == 0)
1127 break;
19bc17a9 1128
4b10dd6c
UD
1129 if (cnt < ctype->nr_charclass)
1130 {
1131 lr_error (lr, _("character class `%s' already defined"), name);
1132 return;
1133 }
19bc17a9 1134
4b10dd6c
UD
1135 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1136 /* Exit code 2 is prescribed in P1003.2b. */
f16491eb 1137 record_error (2, 0, _("\
5d431a3e 1138implementation limit: no more than %Zd character classes allowed"),
f16491eb 1139 MAX_NR_CHARCLASS);
19bc17a9 1140
4b10dd6c 1141 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1142}
1143
1144
4b10dd6c
UD
1145static void
1146ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
47e8b443 1147 const char *name, const struct charmap_t *charmap)
19bc17a9 1148{
4b10dd6c 1149 size_t max_chars = 0;
ba1ffaa1 1150 size_t cnt;
19bc17a9 1151
4b10dd6c 1152 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1153 {
4b10dd6c
UD
1154 if (strcmp (ctype->mapnames[cnt], name) == 0)
1155 break;
1156
1157 if (max_chars < ctype->map_collection_max[cnt])
1158 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1159 }
1160
4b10dd6c
UD
1161 if (cnt < ctype->map_collection_nr)
1162 {
1163 lr_error (lr, _("character map `%s' already defined"), name);
1164 return;
1165 }
19bc17a9 1166
4b10dd6c
UD
1167 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1168 /* Exit code 2 is prescribed in P1003.2b. */
f16491eb 1169 record_error (2, 0, _("\
4b10dd6c 1170implementation limit: no more than %d character maps allowed"),
f16491eb 1171 MAX_NR_CHARMAP);
19bc17a9 1172
4b10dd6c
UD
1173 ctype->mapnames[cnt] = name;
1174
1175 if (max_chars == 0)
1176 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1177 else
1178 ctype->map_collection_max[cnt] = max_chars;
1179
1180 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1181 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1182 ctype->map_collection_act[cnt] = 256;
19bc17a9 1183
4b10dd6c 1184 ++ctype->map_collection_nr;
19bc17a9
RM
1185}
1186
1187
4b10dd6c 1188/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1189 is possible if we only want to extend the name array. */
4b10dd6c
UD
1190static uint32_t *
1191find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1192 size_t *act, uint32_t idx)
19bc17a9 1193{
4b10dd6c 1194 size_t cnt;
19bc17a9 1195
4b10dd6c
UD
1196 if (idx < 256)
1197 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1198
601d2942
UD
1199 /* Use the charnames_idx lookup table instead of the slow search loop. */
1200#if 1
1201 cnt = idx_table_get (&ctype->charnames_idx, idx);
bd75759f 1202 if (cnt == EMPTY)
601d2942
UD
1203 /* Not found. */
1204 cnt = ctype->charnames_act;
1205#else
1206 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1207 if (ctype->charnames[cnt] == idx)
1208 break;
1209#endif
19bc17a9 1210
4b10dd6c
UD
1211 /* We have to distinguish two cases: the name is found or not. */
1212 if (cnt == ctype->charnames_act)
1213 {
1214 /* Extend the name array. */
1215 if (ctype->charnames_act == ctype->charnames_max)
1216 {
1217 ctype->charnames_max *= 2;
5866b131 1218 ctype->charnames = (uint32_t *)
4b10dd6c 1219 xrealloc (ctype->charnames,
5866b131 1220 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1221 }
1222 ctype->charnames[ctype->charnames_act++] = idx;
601d2942 1223 idx_table_add (&ctype->charnames_idx, idx, cnt);
4b10dd6c 1224 }
19bc17a9 1225
4b10dd6c
UD
1226 if (table == NULL)
1227 /* We have done everything we are asked to do. */
1228 return NULL;
19bc17a9 1229
4a9dcff1
UD
1230 if (max == NULL)
1231 /* The caller does not want to extend the table. */
1232 return (cnt >= *act ? NULL : &(*table)[cnt]);
1233
4b10dd6c
UD
1234 if (cnt >= *act)
1235 {
1236 if (cnt >= *max)
1237 {
1238 size_t old_max = *max;
1239 do
1240 *max *= 2;
1241 while (*max <= cnt);
19bc17a9 1242
4b10dd6c 1243 *table =
5866b131 1244 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1245 memset (&(*table)[old_max], '\0',
1246 (*max - old_max) * sizeof (uint32_t));
1247 }
19bc17a9 1248
76e680a8 1249 *act = cnt + 1;
4b10dd6c 1250 }
19bc17a9 1251
4b10dd6c 1252 return &(*table)[cnt];
19bc17a9
RM
1253}
1254
1255
4b10dd6c 1256static int
47e8b443 1257get_character (struct token *now, const struct charmap_t *charmap,
4b10dd6c
UD
1258 struct repertoire_t *repertoire,
1259 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1260{
4b10dd6c
UD
1261 if (now->tok == tok_bsymbol)
1262 {
1263 /* This will hopefully be the normal case. */
1264 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1265 now->val.str.lenmb);
1266 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1267 now->val.str.lenmb);
1268 }
1269 else if (now->tok == tok_ucs4)
1270 {
f0a4b6b1
UD
1271 char utmp[10];
1272
1273 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1274 *seqp = charmap_find_value (charmap, utmp, 9);
1275
1276 if (*seqp == NULL)
1277 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1278
4b10dd6c
UD
1279 if (*seqp == NULL)
1280 {
1281 /* Compute the value in the charmap from the UCS value. */
1282 const char *symbol = repertoire_find_symbol (repertoire,
1283 now->val.ucs4);
19bc17a9 1284
4b10dd6c
UD
1285 if (symbol == NULL)
1286 *seqp = NULL;
1287 else
1288 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1289
4b10dd6c
UD
1290 if (*seqp == NULL)
1291 {
723faa38
UD
1292 if (repertoire != NULL)
1293 {
1294 /* Insert a negative entry. */
1295 static const struct charseq negative
1296 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1297 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1298 sizeof (uint32_t));
1299 *newp = now->val.ucs4;
1300
1301 insert_entry (&repertoire->seq_table, newp,
1302 sizeof (uint32_t), (void *) &negative);
1303 }
4b10dd6c
UD
1304 }
1305 else
1306 (*seqp)->ucs4 = now->val.ucs4;
1307 }
1308 else if ((*seqp)->ucs4 != now->val.ucs4)
1309 *seqp = NULL;
19bc17a9 1310
4b10dd6c
UD
1311 *wchp = now->val.ucs4;
1312 }
1313 else if (now->tok == tok_charcode)
1314 {
1315 /* We must map from the byte code to UCS4. */
1316 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1317 now->val.str.lenmb);
19bc17a9 1318
4b10dd6c
UD
1319 if (*seqp == NULL)
1320 *wchp = ILLEGAL_CHAR_VALUE;
1321 else
1322 {
1323 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1324 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1325 strlen ((*seqp)->name));
1326 *wchp = (*seqp)->ucs4;
1327 }
1328 }
1329 else
1330 return 1;
19bc17a9
RM
1331
1332 return 0;
1333}
1334
1335
a0dc5206
UD
1336/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1337 the .(2). counterparts. */
4b10dd6c
UD
1338static void
1339charclass_symbolic_ellipsis (struct linereader *ldfile,
1340 struct locale_ctype_t *ctype,
47e8b443 1341 const struct charmap_t *charmap,
4b10dd6c
UD
1342 struct repertoire_t *repertoire,
1343 struct token *now,
1344 const char *last_str,
1345 unsigned long int class256_bit,
1346 unsigned long int class_bit, int base,
a0dc5206 1347 int ignore_content, int handle_digits, int step)
19bc17a9 1348{
4b10dd6c
UD
1349 const char *nowstr = now->val.str.startmb;
1350 char tmp[now->val.str.lenmb + 1];
1351 const char *cp;
1352 char *endp;
1353 unsigned long int from;
1354 unsigned long int to;
19bc17a9 1355
4b10dd6c
UD
1356 /* We have to compute the ellipsis values using the symbolic names. */
1357 assert (last_str != NULL);
1358
1359 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1360 {
4b10dd6c
UD
1361 invalid_range:
1362 lr_error (ldfile,
c69136ae 1363 _("`%s' and `%.*s' are not valid names for symbolic range"),
f6ada7ad 1364 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1365 return;
19bc17a9
RM
1366 }
1367
4b10dd6c
UD
1368 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1369 /* Nothing to do, the names are the same. */
1370 return;
19bc17a9 1371
4b10dd6c
UD
1372 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1373 ;
19bc17a9 1374
4b10dd6c
UD
1375 errno = 0;
1376 from = strtoul (cp, &endp, base);
1377 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1378 goto invalid_range;
19bc17a9 1379
4b10dd6c 1380 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1381 if ((to == UINT_MAX && errno == ERANGE)
1382 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1383 goto invalid_range;
19bc17a9 1384
4b10dd6c
UD
1385 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1386 if (!ignore_content)
1387 {
1388 now->val.str.startmb = tmp;
a0dc5206 1389 while ((from += step) <= to)
4b10dd6c
UD
1390 {
1391 struct charseq *seq;
1392 uint32_t wch;
19bc17a9 1393
9068de33 1394 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
4e8d52c9
AJ
1395 (int) (cp - last_str), last_str,
1396 (int) (now->val.str.lenmb - (cp - last_str)),
9068de33 1397 from);
19bc17a9 1398
9e6a7d9c
SL
1399 if (get_character (now, charmap, repertoire, &seq, &wch))
1400 goto invalid_range;
4b10dd6c
UD
1401
1402 if (seq != NULL && seq->nbytes == 1)
1403 /* Yep, we can store information about this byte sequence. */
1404 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1405
4b10dd6c
UD
1406 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1407 /* We have the UCS4 position. */
1408 *find_idx (ctype, &ctype->class_collection,
1409 &ctype->class_collection_max,
1410 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1411
4b10dd6c
UD
1412 if (handle_digits == 1)
1413 {
1414 /* We must store the digit values. */
1415 if (ctype->mbdigits_act == ctype->mbdigits_max)
1416 {
1417 ctype->mbdigits_max *= 2;
1418 ctype->mbdigits = xrealloc (ctype->mbdigits,
1419 (ctype->mbdigits_max
1420 * sizeof (char *)));
1421 ctype->wcdigits_max *= 2;
1422 ctype->wcdigits = xrealloc (ctype->wcdigits,
1423 (ctype->wcdigits_max
1424 * sizeof (uint32_t)));
1425 }
1426
1427 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1428 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1429 }
1430 else if (handle_digits == 2)
1431 {
1432 /* We must store the digit values. */
1433 if (ctype->outdigits_act >= 10)
1434 {
1435 lr_error (ldfile, _("\
1436%s: field `%s' does not contain exactly ten entries"),
1437 "LC_CTYPE", "outdigit");
1438 return;
1439 }
1440
1441 ctype->mboutdigits[ctype->outdigits_act] = seq;
1442 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1443 ++ctype->outdigits_act;
1444 }
1445 }
1446 }
19bc17a9
RM
1447}
1448
1449
a0dc5206 1450/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
4b10dd6c
UD
1451static void
1452charclass_ucs4_ellipsis (struct linereader *ldfile,
1453 struct locale_ctype_t *ctype,
47e8b443 1454 const struct charmap_t *charmap,
4b10dd6c
UD
1455 struct repertoire_t *repertoire,
1456 struct token *now, uint32_t last_wch,
1457 unsigned long int class256_bit,
1458 unsigned long int class_bit, int ignore_content,
a0dc5206 1459 int handle_digits, int step)
19bc17a9 1460{
4b10dd6c 1461 if (last_wch > now->val.ucs4)
19bc17a9 1462 {
4b10dd6c
UD
1463 lr_error (ldfile, _("\
1464to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1465 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1466 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1467 return;
1468 }
1469
4b10dd6c 1470 if (!ignore_content)
a0dc5206 1471 while ((last_wch += step) <= now->val.ucs4)
4b10dd6c
UD
1472 {
1473 /* We have to find out whether there is a byte sequence corresponding
1474 to this UCS4 value. */
f0a4b6b1
UD
1475 struct charseq *seq;
1476 char utmp[10];
1477
1478 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1479 seq = charmap_find_value (charmap, utmp, 9);
a0dc5206
UD
1480 if (seq == NULL)
1481 {
1482 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1483 seq = charmap_find_value (charmap, utmp, 5);
1484 }
f0a4b6b1
UD
1485
1486 if (seq == NULL)
1487 /* Try looking in the repertoire map. */
1488 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1489
4b10dd6c
UD
1490 /* If this is the first time we look for this sequence create a new
1491 entry. */
1492 if (seq == NULL)
1493 {
f0a4b6b1
UD
1494 static const struct charseq negative
1495 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1496
f0a4b6b1
UD
1497 /* Find the symbolic name for this UCS4 value. */
1498 if (repertoire != NULL)
4b10dd6c 1499 {
f0a4b6b1
UD
1500 const char *symbol = repertoire_find_symbol (repertoire,
1501 last_wch);
5866b131
UD
1502 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1503 sizeof (uint32_t));
f0a4b6b1
UD
1504 *newp = last_wch;
1505
1506 if (symbol != NULL)
1507 /* We have a name, now search the multibyte value. */
1508 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1509
1510 if (seq == NULL)
1511 /* We have to create a fake entry. */
1512 seq = (struct charseq *) &negative;
1513 else
1514 seq->ucs4 = last_wch;
1515
5866b131
UD
1516 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1517 seq);
4b10dd6c
UD
1518 }
1519 else
f0a4b6b1
UD
1520 /* We have to create a fake entry. */
1521 seq = (struct charseq *) &negative;
4b10dd6c
UD
1522 }
1523
1524 /* We have a name, now search the multibyte value. */
1525 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1526 /* Yep, we can store information about this byte sequence. */
1527 ctype->class256_collection[(size_t) seq->bytes[0]]
1528 |= class256_bit;
1529
1530 /* And of course we have the UCS4 position. */
5866b131 1531 if (class_bit != 0)
4b10dd6c
UD
1532 *find_idx (ctype, &ctype->class_collection,
1533 &ctype->class_collection_max,
1534 &ctype->class_collection_act, last_wch) |= class_bit;
1535
1536 if (handle_digits == 1)
1537 {
1538 /* We must store the digit values. */
1539 if (ctype->mbdigits_act == ctype->mbdigits_max)
1540 {
1541 ctype->mbdigits_max *= 2;
1542 ctype->mbdigits = xrealloc (ctype->mbdigits,
1543 (ctype->mbdigits_max
1544 * sizeof (char *)));
1545 ctype->wcdigits_max *= 2;
1546 ctype->wcdigits = xrealloc (ctype->wcdigits,
1547 (ctype->wcdigits_max
1548 * sizeof (uint32_t)));
1549 }
1550
1551 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1552 ? seq : NULL);
1553 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1554 }
1555 else if (handle_digits == 2)
1556 {
1557 /* We must store the digit values. */
1558 if (ctype->outdigits_act >= 10)
1559 {
1560 lr_error (ldfile, _("\
1561%s: field `%s' does not contain exactly ten entries"),
1562 "LC_CTYPE", "outdigit");
1563 return;
1564 }
19bc17a9 1565
4b10dd6c
UD
1566 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1567 ? seq : NULL);
1568 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1569 ++ctype->outdigits_act;
1570 }
1571 }
19bc17a9
RM
1572}
1573
1574
4b10dd6c 1575/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1576static void
4b10dd6c
UD
1577charclass_charcode_ellipsis (struct linereader *ldfile,
1578 struct locale_ctype_t *ctype,
47e8b443 1579 const struct charmap_t *charmap,
4b10dd6c
UD
1580 struct repertoire_t *repertoire,
1581 struct token *now, char *last_charcode,
1582 uint32_t last_charcode_len,
1583 unsigned long int class256_bit,
1584 unsigned long int class_bit, int ignore_content,
1585 int handle_digits)
19bc17a9 1586{
4b10dd6c
UD
1587 /* First check whether the to-value is larger. */
1588 if (now->val.charcode.nbytes != last_charcode_len)
1589 {
1590 lr_error (ldfile, _("\
379ed351 1591start and end character sequence of range must have the same length"));
4b10dd6c
UD
1592 return;
1593 }
19bc17a9 1594
4b10dd6c 1595 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1596 {
4b10dd6c
UD
1597 lr_error (ldfile, _("\
1598to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1599 return;
1600 }
1601
4b10dd6c
UD
1602 if (!ignore_content)
1603 {
1604 do
1605 {
1606 /* Increment the byte sequence value. */
1607 struct charseq *seq;
1608 uint32_t wch;
1609 int i;
1610
1611 for (i = last_charcode_len - 1; i >= 0; --i)
1612 if (++last_charcode[i] != 0)
1613 break;
1614
1615 if (last_charcode_len == 1)
1616 /* Of course we have the charcode value. */
1617 ctype->class256_collection[(size_t) last_charcode[0]]
1618 |= class256_bit;
1619
1620 /* Find the symbolic name. */
1621 seq = charmap_find_symbol (charmap, last_charcode,
1622 last_charcode_len);
1623 if (seq != NULL)
1624 {
1625 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1626 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1627 strlen (seq->name));
f0a4b6b1 1628 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1629
1630 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1631 *find_idx (ctype, &ctype->class_collection,
1632 &ctype->class_collection_max,
1633 &ctype->class_collection_act, wch) |= class_bit;
1634 }
1635 else
1636 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1637
4b10dd6c
UD
1638 if (handle_digits == 1)
1639 {
1640 /* We must store the digit values. */
1641 if (ctype->mbdigits_act == ctype->mbdigits_max)
1642 {
1643 ctype->mbdigits_max *= 2;
1644 ctype->mbdigits = xrealloc (ctype->mbdigits,
1645 (ctype->mbdigits_max
1646 * sizeof (char *)));
1647 ctype->wcdigits_max *= 2;
1648 ctype->wcdigits = xrealloc (ctype->wcdigits,
1649 (ctype->wcdigits_max
1650 * sizeof (uint32_t)));
1651 }
1652
1653 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1654 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1655 seq->nbytes = last_charcode_len;
1656
1657 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1658 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1659 }
1660 else if (handle_digits == 2)
1661 {
1662 struct charseq *seq;
1663 /* We must store the digit values. */
1664 if (ctype->outdigits_act >= 10)
1665 {
1666 lr_error (ldfile, _("\
1667%s: field `%s' does not contain exactly ten entries"),
1668 "LC_CTYPE", "outdigit");
1669 return;
1670 }
1671
1672 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1673 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1674 seq->nbytes = last_charcode_len;
1675
1676 ctype->mboutdigits[ctype->outdigits_act] = seq;
1677 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1678 ++ctype->outdigits_act;
1679 }
1680 }
1681 while (memcmp (last_charcode, now->val.charcode.bytes,
1682 last_charcode_len) != 0);
1683 }
19bc17a9
RM
1684}
1685
1686
47e8b443
UD
1687static uint32_t *
1688find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1689 uint32_t wch)
1690{
1691 struct translit_t *trunp = ctype->translit;
1692 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1693
1694 while (trunp != NULL)
1695 {
1696 /* XXX We simplify things here. The transliterations we look
1697 for are only allowed to have one character. */
1698 if (trunp->from[0] == wch && trunp->from[1] == 0)
1699 {
1700 /* Found it. Now look for a transliteration which can be
1701 represented with the character set. */
1702 struct translit_to_t *torunp = trunp->to;
1703
1704 while (torunp != NULL)
1705 {
1706 int i;
1707
1708 for (i = 0; torunp->str[i] != 0; ++i)
1709 {
1710 char utmp[10];
1711
1712 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1713 if (charmap_find_value (charmap, utmp, 9) == NULL)
1714 /* This character cannot be represented. */
1715 break;
1716 }
1717
1718 if (torunp->str[i] == 0)
1719 return torunp->str;
1720
1721 torunp = torunp->next;
1722 }
1723
1724 break;
1725 }
1726
1727 trunp = trunp->next;
1728 }
1729
1730 /* Check for ignored chars. */
1731 while (tirunp != NULL)
1732 {
1733 if (tirunp->from <= wch && tirunp->to >= wch)
1734 {
1735 uint32_t wi;
1736
1737 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1738 if (wi == wch)
363a9899 1739 return no_str;
47e8b443
UD
1740 }
1741 }
1742
1743 /* Nothing found. */
1744 return NULL;
1745}
1746
1747
1748uint32_t *
1749find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1750 uint32_t wch)
1751{
1752 struct locale_ctype_t *ctype;
1753 uint32_t *result = NULL;
1754
1755 assert (locale != NULL);
1756 ctype = locale->categories[LC_CTYPE].ctype;
1757
b037a293
UD
1758 if (ctype == NULL)
1759 return NULL;
1760
47e8b443
UD
1761 if (ctype->translit != NULL)
1762 result = find_translit2 (ctype, charmap, wch);
1763
1764 if (result == NULL)
1765 {
1766 struct translit_include_t *irunp = ctype->translit_include;
1767
1768 while (irunp != NULL && result == NULL)
1769 {
1770 result = find_translit (find_locale (CTYPE_LOCALE,
1771 irunp->copy_locale,
1772 irunp->copy_repertoire,
1773 charmap),
1774 charmap, wch);
1775 irunp = irunp->next;
1776 }
1777 }
1778
1779 return result;
1780}
1781
1782
4b10dd6c
UD
1783/* Read one transliteration entry. */
1784static uint32_t *
1785read_widestring (struct linereader *ldfile, struct token *now,
47e8b443
UD
1786 const struct charmap_t *charmap,
1787 struct repertoire_t *repertoire)
19bc17a9 1788{
4b10dd6c 1789 uint32_t *wstr;
19bc17a9 1790
4b10dd6c
UD
1791 if (now->tok == tok_default_missing)
1792 /* The special name "" will denote this case. */
363a9899 1793 wstr = no_str;
4b10dd6c 1794 else if (now->tok == tok_bsymbol)
19bc17a9 1795 {
4b10dd6c 1796 /* Get the value from the repertoire. */
a673fbcb 1797 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1798 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1799 now->val.str.lenmb);
1800 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1801 {
1802 /* We cannot proceed, we don't know the UCS4 value. */
1803 free (wstr);
1804 return NULL;
1805 }
4b10dd6c
UD
1806
1807 wstr[1] = 0;
19bc17a9 1808 }
4b10dd6c 1809 else if (now->tok == tok_ucs4)
19bc17a9 1810 {
a673fbcb 1811 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1812 wstr[0] = now->val.ucs4;
1813 wstr[1] = 0;
1814 }
1815 else if (now->tok == tok_charcode)
1816 {
1817 /* Argh, we have to convert to the symbol name first and then to the
1818 UCS4 value. */
1819 struct charseq *seq = charmap_find_symbol (charmap,
1820 now->val.str.startmb,
1821 now->val.str.lenmb);
1822 if (seq == NULL)
1823 /* Cannot find the UCS4 value. */
1824 return NULL;
1825
1826 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1827 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1828 strlen (seq->name));
1829 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1830 /* We cannot proceed, we don't know the UCS4 value. */
1831 return NULL;
1832
a673fbcb 1833 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1834 wstr[0] = seq->ucs4;
1835 wstr[1] = 0;
1836 }
1837 else if (now->tok == tok_string)
1838 {
1839 wstr = now->val.str.startwc;
a673fbcb 1840 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1841 return NULL;
1842 }
1843 else
1844 {
1845 if (now->tok != tok_eol && now->tok != tok_eof)
1846 lr_ignore_rest (ldfile, 0);
1847 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1848 return (uint32_t *) -1l;
19bc17a9
RM
1849 }
1850
4b10dd6c
UD
1851 return wstr;
1852}
19bc17a9 1853
19bc17a9 1854
4b10dd6c
UD
1855static void
1856read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
47e8b443 1857 struct token *now, const struct charmap_t *charmap,
4b10dd6c
UD
1858 struct repertoire_t *repertoire)
1859{
1860 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1861 struct translit_t *result;
1862 struct translit_to_t **top;
a673fbcb 1863 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1864 int first;
1865 int ignore;
1866
1867 if (from_wstr == NULL)
1868 /* There is no valid from string. */
1869 return;
19bc17a9 1870
4b10dd6c
UD
1871 result = (struct translit_t *) obstack_alloc (ob,
1872 sizeof (struct translit_t));
1873 result->from = from_wstr;
a673fbcb
UD
1874 result->fname = ldfile->fname;
1875 result->lineno = ldfile->lineno;
4b10dd6c
UD
1876 result->next = NULL;
1877 result->to = NULL;
1878 top = &result->to;
1879 first = 1;
1880 ignore = 0;
1881
1882 while (1)
1883 {
1884 uint32_t *to_wstr;
1885
1886 /* Next we have one or more transliterations. They are
1887 separated by semicolons. */
47e8b443 1888 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
4b10dd6c
UD
1889
1890 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1891 {
1892 /* One string read. */
1893 const uint32_t zero = 0;
1894
1895 if (!ignore)
1896 {
1897 obstack_grow (ob, &zero, 4);
1898 to_wstr = obstack_finish (ob);
1899
1900 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1901 (*top)->str = to_wstr;
1902 (*top)->next = NULL;
1903 }
1904
1905 if (now->tok == tok_eol)
1906 {
1907 result->next = ctype->translit;
1908 ctype->translit = result;
1909 return;
1910 }
1911
1912 if (!ignore)
1913 top = &(*top)->next;
1914 ignore = 0;
1915 }
1916 else
1917 {
1918 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1919 if (to_wstr == (uint32_t *) -1l)
1920 {
1921 /* An error occurred. */
1922 obstack_free (ob, result);
1923 return;
1924 }
1925
1926 if (to_wstr == NULL)
1927 ignore = 1;
1928 else
1929 /* This value is usable. */
1930 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1931
4b10dd6c
UD
1932 first = 0;
1933 }
1934 }
19bc17a9
RM
1935}
1936
1937
a673fbcb
UD
1938static void
1939read_translit_ignore_entry (struct linereader *ldfile,
1940 struct locale_ctype_t *ctype,
47e8b443 1941 const struct charmap_t *charmap,
a673fbcb
UD
1942 struct repertoire_t *repertoire)
1943{
1944 /* We expect a semicolon-separated list of characters we ignore. We are
1945 only interested in the wide character definitions. These must be
1946 single characters, possibly defining a range when an ellipsis is used. */
1947 while (1)
1948 {
47e8b443
UD
1949 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1950 verbose);
a673fbcb
UD
1951 struct translit_ignore_t *newp;
1952 uint32_t from;
1953
1954 if (now->tok == tok_eol || now->tok == tok_eof)
1955 {
1956 lr_error (ldfile,
1957 _("premature end of `translit_ignore' definition"));
1958 return;
1959 }
1960
1961 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1962 {
1963 lr_error (ldfile, _("syntax error"));
1964 lr_ignore_rest (ldfile, 0);
1965 return;
1966 }
1967
1968 if (now->tok == tok_ucs4)
1969 from = now->val.ucs4;
1970 else
f0a4b6b1
UD
1971 /* Try to get the value. */
1972 from = repertoire_find_value (repertoire, now->val.str.startmb,
1973 now->val.str.lenmb);
a673fbcb
UD
1974
1975 if (from == ILLEGAL_CHAR_VALUE)
1976 {
1977 lr_error (ldfile, "invalid character name");
1978 newp = NULL;
1979 }
1980 else
1981 {
1982 newp = (struct translit_ignore_t *)
1983 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1984 newp->from = from;
1985 newp->to = from;
a0dc5206 1986 newp->step = 1;
a673fbcb
UD
1987
1988 newp->next = ctype->translit_ignore;
1989 ctype->translit_ignore = newp;
1990 }
1991
1992 /* Now we expect either a semicolon, an ellipsis, or the end of the
1993 line. */
47e8b443 1994 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb 1995
a0dc5206 1996 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
a673fbcb
UD
1997 {
1998 /* XXX Should we bother implementing `....'? `...' certainly
1999 will not be implemented. */
2000 uint32_t to;
a0dc5206 2001 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
a673fbcb 2002
47e8b443 2003 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb
UD
2004
2005 if (now->tok == tok_eol || now->tok == tok_eof)
2006 {
2007 lr_error (ldfile,
2008 _("premature end of `translit_ignore' definition"));
2009 return;
2010 }
2011
2012 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2013 {
2014 lr_error (ldfile, _("syntax error"));
2015 lr_ignore_rest (ldfile, 0);
2016 return;
2017 }
2018
2019 if (now->tok == tok_ucs4)
2020 to = now->val.ucs4;
2021 else
f0a4b6b1
UD
2022 /* Try to get the value. */
2023 to = repertoire_find_value (repertoire, now->val.str.startmb,
2024 now->val.str.lenmb);
a673fbcb
UD
2025
2026 if (to == ILLEGAL_CHAR_VALUE)
2027 lr_error (ldfile, "invalid character name");
2028 else
2029 {
2030 /* Make sure the `to'-value is larger. */
2031 if (to >= from)
a0dc5206
UD
2032 {
2033 newp->to = to;
2034 newp->step = step;
2035 }
a673fbcb
UD
2036 else
2037 lr_error (ldfile, _("\
2038to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2039 (to | from) < 65536 ? 4 : 8, to,
2040 (to | from) < 65536 ? 4 : 8, from);
2041 }
2042
2043 /* And the next token. */
47e8b443 2044 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
a673fbcb
UD
2045 }
2046
2047 if (now->tok == tok_eol || now->tok == tok_eof)
2048 /* We are done. */
2049 return;
2050
2051 if (now->tok == tok_semicolon)
2052 /* Next round. */
2053 continue;
2054
2055 /* If we come here something is wrong. */
2056 lr_error (ldfile, _("syntax error"));
2057 lr_ignore_rest (ldfile, 0);
2058 return;
2059 }
2060}
2061
2062
4b10dd6c
UD
2063/* The parser for the LC_CTYPE section of the locale definition. */
2064void
2065ctype_read (struct linereader *ldfile, struct localedef_t *result,
47e8b443 2066 const struct charmap_t *charmap, const char *repertoire_name,
4b10dd6c 2067 int ignore_content)
19bc17a9 2068{
4b10dd6c
UD
2069 struct repertoire_t *repertoire = NULL;
2070 struct locale_ctype_t *ctype;
2071 struct token *now;
2072 enum token_t nowtok;
19bc17a9 2073 size_t cnt;
4b10dd6c
UD
2074 uint32_t last_wch = 0;
2075 enum token_t last_token;
2076 enum token_t ellipsis_token;
a0dc5206 2077 int step;
4b10dd6c
UD
2078 char last_charcode[16];
2079 size_t last_charcode_len = 0;
2080 const char *last_str = NULL;
2081 int mapidx;
a6bd56c7 2082 struct localedef_t *copy_locale = NULL;
19bc17a9 2083
4b10dd6c
UD
2084 /* Get the repertoire we have to use. */
2085 if (repertoire_name != NULL)
2086 repertoire = repertoire_read (repertoire_name);
19bc17a9 2087
4b10dd6c
UD
2088 /* The rest of the line containing `LC_CTYPE' must be free. */
2089 lr_ignore_rest (ldfile, 1);
19bc17a9 2090
4b10dd6c
UD
2091
2092 do
19bc17a9 2093 {
47e8b443 2094 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2095 nowtok = now->tok;
19bc17a9 2096 }
4b10dd6c 2097 while (nowtok == tok_eol);
19bc17a9 2098
4b10dd6c
UD
2099 /* If we see `copy' now we are almost done. */
2100 if (nowtok == tok_copy)
2101 {
47e8b443 2102 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7
UD
2103 if (now->tok != tok_string)
2104 {
2105 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2106
2107 skip_category:
2108 do
47e8b443 2109 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7
UD
2110 while (now->tok != tok_eof && now->tok != tok_end);
2111
2112 if (now->tok != tok_eof
47e8b443 2113 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
93693c4d 2114 now->tok == tok_eof))
a6bd56c7
UD
2115 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2116 else if (now->tok != tok_lc_ctype)
2117 {
2118 lr_error (ldfile, _("\
2119%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2120 lr_ignore_rest (ldfile, 0);
2121 }
2122 else
2123 lr_ignore_rest (ldfile, 1);
2124
2125 return;
2126 }
2127
2128 if (! ignore_content)
2129 {
2130 /* Get the locale definition. */
2131 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2132 repertoire_name, charmap, NULL);
2133 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2134 {
2135 /* Not yet loaded. So do it now. */
2136 if (locfile_read (copy_locale, charmap) != 0)
2137 goto skip_category;
2138 }
71663747
UD
2139
2140 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2141 return;
a6bd56c7
UD
2142 }
2143
2144 lr_ignore_rest (ldfile, 1);
2145
47e8b443 2146 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
a6bd56c7 2147 nowtok = now->tok;
4b10dd6c 2148 }
75cd5204 2149
4b10dd6c 2150 /* Prepare the data structures. */
a6bd56c7 2151 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
4b10dd6c
UD
2152 ctype = result->categories[LC_CTYPE].ctype;
2153
2154 /* Remember the repertoire we use. */
2155 if (!ignore_content)
2156 ctype->repertoire = repertoire;
2157
2158 while (1)
19bc17a9 2159 {
4b10dd6c
UD
2160 unsigned long int class_bit = 0;
2161 unsigned long int class256_bit = 0;
2162 int handle_digits = 0;
2163
2164 /* Of course we don't proceed beyond the end of file. */
2165 if (nowtok == tok_eof)
2166 break;
2167
2168 /* Ingore empty lines. */
2169 if (nowtok == tok_eol)
19bc17a9 2170 {
47e8b443 2171 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2172 nowtok = now->tok;
2173 continue;
2174 }
19bc17a9 2175
4b10dd6c
UD
2176 switch (nowtok)
2177 {
5491da0d 2178 case tok_charclass:
47e8b443 2179 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2180 while (now->tok == tok_ident || now->tok == tok_string)
2181 {
2182 ctype_class_new (ldfile, ctype, now->val.str.startmb);
47e8b443 2183 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2184 if (now->tok != tok_semicolon)
2185 break;
47e8b443 2186 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2187 }
2188 if (now->tok != tok_eol)
2189 SYNTAX_ERROR (_("\
2190%s: syntax error in definition of new character class"), "LC_CTYPE");
2191 break;
2192
2193 case tok_charconv:
47e8b443 2194 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2195 while (now->tok == tok_ident || now->tok == tok_string)
2196 {
2197 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
47e8b443 2198 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2199 if (now->tok != tok_semicolon)
2200 break;
47e8b443 2201 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
5491da0d
UD
2202 }
2203 if (now->tok != tok_eol)
2204 SYNTAX_ERROR (_("\
2205%s: syntax error in definition of new character map"), "LC_CTYPE");
2206 break;
2207
4b10dd6c 2208 case tok_class:
b9eb05d6
UD
2209 /* Ignore the rest of the line if we don't need the input of
2210 this line. */
2211 if (ignore_content)
2212 {
2213 lr_ignore_rest (ldfile, 0);
2214 break;
2215 }
2216
4b10dd6c
UD
2217 /* We simply forget the `class' keyword and use the following
2218 operand to determine the bit. */
47e8b443 2219 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2220 if (now->tok == tok_ident || now->tok == tok_string)
2221 {
87372aa9 2222 /* Must can be one of the predefined class names. */
4b10dd6c
UD
2223 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2224 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2225 break;
2226 if (cnt >= ctype->nr_charclass)
2227 {
fc5771e4
MM
2228 /* OK, it's a new class. */
2229 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 2230
fc5771e4 2231 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
2232 }
2233 else
7f653277
UD
2234 {
2235 class_bit = _ISwbit (cnt);
4b10dd6c 2236
7f653277
UD
2237 free (now->val.str.startmb);
2238 }
4b10dd6c
UD
2239 }
2240 else if (now->tok == tok_digit)
2241 goto handle_tok_digit;
2242 else if (now->tok < tok_upper || now->tok > tok_blank)
2243 goto err_label;
2244 else
2245 {
2246 class_bit = BITw (now->tok);
2247 class256_bit = BIT (now->tok);
2248 }
2249
2250 /* The next character must be a semicolon. */
47e8b443 2251 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2252 if (now->tok != tok_semicolon)
2253 goto err_label;
2254 goto read_charclass;
2255
2256 case tok_upper:
2257 case tok_lower:
2258 case tok_alpha:
2259 case tok_alnum:
2260 case tok_space:
2261 case tok_cntrl:
2262 case tok_punct:
2263 case tok_graph:
2264 case tok_print:
2265 case tok_xdigit:
2266 case tok_blank:
b9eb05d6
UD
2267 /* Ignore the rest of the line if we don't need the input of
2268 this line. */
2269 if (ignore_content)
2270 {
2271 lr_ignore_rest (ldfile, 0);
2272 break;
2273 }
2274
4b10dd6c
UD
2275 class_bit = BITw (now->tok);
2276 class256_bit = BIT (now->tok);
2277 handle_digits = 0;
2278 read_charclass:
2279 ctype->class_done |= class_bit;
2280 last_token = tok_none;
2281 ellipsis_token = tok_none;
a0dc5206 2282 step = 1;
47e8b443 2283 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2284 while (now->tok != tok_eol && now->tok != tok_eof)
2285 {
2286 uint32_t wch;
2287 struct charseq *seq;
2288
2289 if (ellipsis_token == tok_none)
2290 {
2291 if (get_character (now, charmap, repertoire, &seq, &wch))
2292 goto err_label;
2293
2294 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2295 /* Yep, we can store information about this byte
2296 sequence. */
2297 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2298
2299 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2300 && class_bit != 0)
2301 /* We have the UCS4 position. */
2302 *find_idx (ctype, &ctype->class_collection,
2303 &ctype->class_collection_max,
2304 &ctype->class_collection_act, wch) |= class_bit;
2305
2306 last_token = now->tok;
549b3c3a 2307 /* Terminate the string. */
9e2b7438
UD
2308 if (last_token == tok_bsymbol)
2309 {
2310 now->val.str.startmb[now->val.str.lenmb] = '\0';
2311 last_str = now->val.str.startmb;
2312 }
2313 else
2314 last_str = NULL;
4b10dd6c
UD
2315 last_wch = wch;
2316 memcpy (last_charcode, now->val.charcode.bytes, 16);
2317 last_charcode_len = now->val.charcode.nbytes;
2318
2319 if (!ignore_content && handle_digits == 1)
2320 {
2321 /* We must store the digit values. */
2322 if (ctype->mbdigits_act == ctype->mbdigits_max)
2323 {
b9eb05d6 2324 ctype->mbdigits_max += 10;
4b10dd6c
UD
2325 ctype->mbdigits = xrealloc (ctype->mbdigits,
2326 (ctype->mbdigits_max
2327 * sizeof (char *)));
b9eb05d6 2328 ctype->wcdigits_max += 10;
4b10dd6c
UD
2329 ctype->wcdigits = xrealloc (ctype->wcdigits,
2330 (ctype->wcdigits_max
2331 * sizeof (uint32_t)));
2332 }
2333
2334 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2335 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2336 }
2337 else if (!ignore_content && handle_digits == 2)
2338 {
2339 /* We must store the digit values. */
2340 if (ctype->outdigits_act >= 10)
2341 {
2342 lr_error (ldfile, _("\
2343%s: field `%s' does not contain exactly ten entries"),
2344 "LC_CTYPE", "outdigit");
69c69fe1
UD
2345 lr_ignore_rest (ldfile, 0);
2346 break;
4b10dd6c
UD
2347 }
2348
2349 ctype->mboutdigits[ctype->outdigits_act] = seq;
2350 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2351 ++ctype->outdigits_act;
2352 }
2353 }
2354 else
2355 {
2356 /* Now it gets complicated. We have to resolve the
2357 ellipsis problem. First we must distinguish between
2358 the different kind of ellipsis and this must match the
2359 tokens we have seen. */
2360 assert (last_token != tok_none);
2361
2362 if (last_token != now->tok)
2363 {
2364 lr_error (ldfile, _("\
2365ellipsis range must be marked by two operands of same type"));
2366 lr_ignore_rest (ldfile, 0);
2367 break;
2368 }
2369
2370 if (last_token == tok_bsymbol)
2371 {
2372 if (ellipsis_token == tok_ellipsis3)
2373 lr_error (ldfile, _("with symbolic name range values \
2374the absolute ellipsis `...' must not be used"));
2375
2376 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2377 repertoire, now, last_str,
2378 class256_bit, class_bit,
2379 (ellipsis_token
2380 == tok_ellipsis4
2381 ? 10 : 16),
2382 ignore_content,
a0dc5206 2383 handle_digits, step);
4b10dd6c
UD
2384 }
2385 else if (last_token == tok_ucs4)
2386 {
2387 if (ellipsis_token != tok_ellipsis2)
2388 lr_error (ldfile, _("\
2389with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2390
2391 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2392 repertoire, now, last_wch,
2393 class256_bit, class_bit,
a0dc5206
UD
2394 ignore_content, handle_digits,
2395 step);
4b10dd6c
UD
2396 }
2397 else
2398 {
2399 assert (last_token == tok_charcode);
2400
2401 if (ellipsis_token != tok_ellipsis3)
2402 lr_error (ldfile, _("\
2403with character code range values one must use the absolute ellipsis `...'"));
2404
2405 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2406 repertoire, now,
2407 last_charcode,
2408 last_charcode_len,
2409 class256_bit, class_bit,
2410 ignore_content,
2411 handle_digits);
2412 }
2413
2414 /* Now we have used the last value. */
2415 last_token = tok_none;
2416 }
2417
2418 /* Next we expect a semicolon or the end of the line. */
47e8b443 2419 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2420 if (now->tok == tok_eol || now->tok == tok_eof)
2421 break;
2422
2423 if (last_token != tok_none
a0dc5206 2424 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
4b10dd6c 2425 {
a0dc5206
UD
2426 if (now->tok == tok_ellipsis2_2)
2427 {
2428 now->tok = tok_ellipsis2;
2429 step = 2;
2430 }
2431 else if (now->tok == tok_ellipsis4_2)
2432 {
2433 now->tok = tok_ellipsis4;
2434 step = 2;
2435 }
2436
4b10dd6c 2437 ellipsis_token = now->tok;
a0dc5206 2438
47e8b443 2439 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2440 continue;
2441 }
2442
2443 if (now->tok != tok_semicolon)
2444 goto err_label;
2445
2446 /* And get the next character. */
47e8b443 2447 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2448
2449 ellipsis_token = tok_none;
a0dc5206 2450 step = 1;
4b10dd6c
UD
2451 }
2452 break;
2453
2454 case tok_digit:
b9eb05d6
UD
2455 /* Ignore the rest of the line if we don't need the input of
2456 this line. */
2457 if (ignore_content)
42d7c593
UD
2458 {
2459 lr_ignore_rest (ldfile, 0);
2460 break;
2461 }
b9eb05d6 2462
4b10dd6c
UD
2463 handle_tok_digit:
2464 class_bit = _ISwdigit;
2465 class256_bit = _ISdigit;
2466 handle_digits = 1;
2467 goto read_charclass;
2468
2469 case tok_outdigit:
b9eb05d6
UD
2470 /* Ignore the rest of the line if we don't need the input of
2471 this line. */
2472 if (ignore_content)
2473 {
2474 lr_ignore_rest (ldfile, 0);
2475 break;
2476 }
2477
4b10dd6c
UD
2478 if (ctype->outdigits_act != 0)
2479 lr_error (ldfile, _("\
2480%s: field `%s' declared more than once"),
2481 "LC_CTYPE", "outdigit");
2482 class_bit = 0;
2483 class256_bit = 0;
2484 handle_digits = 2;
2485 goto read_charclass;
2486
2487 case tok_toupper:
b9eb05d6
UD
2488 /* Ignore the rest of the line if we don't need the input of
2489 this line. */
2490 if (ignore_content)
2491 {
2492 lr_ignore_rest (ldfile, 0);
2493 break;
2494 }
2495
4b10dd6c
UD
2496 mapidx = 0;
2497 goto read_mapping;
2498
2499 case tok_tolower:
b9eb05d6
UD
2500 /* Ignore the rest of the line if we don't need the input of
2501 this line. */
2502 if (ignore_content)
2503 {
2504 lr_ignore_rest (ldfile, 0);
2505 break;
2506 }
2507
4b10dd6c
UD
2508 mapidx = 1;
2509 goto read_mapping;
2510
2511 case tok_map:
b9eb05d6
UD
2512 /* Ignore the rest of the line if we don't need the input of
2513 this line. */
2514 if (ignore_content)
2515 {
2516 lr_ignore_rest (ldfile, 0);
2517 break;
2518 }
2519
4b10dd6c
UD
2520 /* We simply forget the `map' keyword and use the following
2521 operand to determine the mapping. */
47e8b443 2522 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2523 if (now->tok == tok_ident || now->tok == tok_string)
2524 {
2525 size_t cnt;
2526
2527 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2528 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2529 break;
2530
7f653277
UD
2531 if (cnt < ctype->map_collection_nr)
2532 free (now->val.str.startmb);
2533 else
87372aa9
UD
2534 /* OK, it's a new map. */
2535 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2536
2537 mapidx = cnt;
4b10dd6c
UD
2538 }
2539 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2540 goto err_label;
2541 else
2542 mapidx = now->tok - tok_toupper;
2543
47e8b443 2544 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2545 /* This better should be a semicolon. */
2546 if (now->tok != tok_semicolon)
2547 goto err_label;
2548
2549 read_mapping:
2550 /* Test whether this mapping was already defined. */
2551 if (ctype->tomap_done[mapidx])
2552 {
2553 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2554 ctype->mapnames[mapidx]);
2555 lr_ignore_rest (ldfile, 0);
2556 break;
2557 }
2558 ctype->tomap_done[mapidx] = 1;
2559
47e8b443 2560 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2561 while (now->tok != tok_eol && now->tok != tok_eof)
2562 {
2563 struct charseq *from_seq;
2564 uint32_t from_wch;
2565 struct charseq *to_seq;
2566 uint32_t to_wch;
2567
2568 /* Every pair starts with an opening brace. */
2569 if (now->tok != tok_open_brace)
2570 goto err_label;
2571
2572 /* Next comes the from-value. */
47e8b443 2573 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2574 if (get_character (now, charmap, repertoire, &from_seq,
2575 &from_wch) != 0)
2576 goto err_label;
2577
2578 /* The next is a comma. */
47e8b443 2579 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2580 if (now->tok != tok_comma)
2581 goto err_label;
2582
2583 /* And the other value. */
47e8b443 2584 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2585 if (get_character (now, charmap, repertoire, &to_seq,
2586 &to_wch) != 0)
2587 goto err_label;
2588
2589 /* And the last thing is the closing brace. */
47e8b443 2590 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2591 if (now->tok != tok_close_brace)
2592 goto err_label;
2593
2594 if (!ignore_content)
2595 {
f0c7c524
UD
2596 /* Check whether the mapping converts from an ASCII value
2597 to a non-ASCII value. */
2598 if (from_seq != NULL && from_seq->nbytes == 1
2599 && isascii (from_seq->bytes[0])
2600 && to_seq != NULL && (to_seq->nbytes != 1
2601 || !isascii (to_seq->bytes[0])))
2602 ctype->to_nonascii = 1;
2603
4b10dd6c
UD
2604 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2605 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2606 /* We can use this value. */
2607 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2608 = to_seq->bytes[0];
2609
2610 if (from_wch != ILLEGAL_CHAR_VALUE
2611 && to_wch != ILLEGAL_CHAR_VALUE)
2612 /* Both correct values. */
2613 *find_idx (ctype, &ctype->map_collection[mapidx],
2614 &ctype->map_collection_max[mapidx],
2615 &ctype->map_collection_act[mapidx],
2616 from_wch) = to_wch;
2617 }
2618
2619 /* Now comes a semicolon or the end of the line/file. */
47e8b443 2620 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2621 if (now->tok == tok_semicolon)
47e8b443 2622 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2623 }
2624 break;
2625
2626 case tok_translit_start:
02fb3d17
UD
2627 /* Ignore the entire translit section with its peculiar syntax
2628 if we don't need the input. */
b9eb05d6
UD
2629 if (ignore_content)
2630 {
02fb3d17
UD
2631 do
2632 {
2633 lr_ignore_rest (ldfile, 0);
47e8b443 2634 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
02fb3d17
UD
2635 }
2636 while (now->tok != tok_translit_end && now->tok != tok_eof);
2637
2638 if (now->tok == tok_eof)
2639 lr_error (ldfile, _(\
2640"%s: `translit_start' section does not end with `translit_end'"),
2641 "LC_CTYPE");
2642
b9eb05d6
UD
2643 break;
2644 }
2645
4b10dd6c
UD
2646 /* The rest of the line better should be empty. */
2647 lr_ignore_rest (ldfile, 1);
2648
2649 /* We count here the number of allocated entries in the `translit'
2650 array. */
2651 cnt = 0;
2652
4b156cb2
UD
2653 ldfile->translate_strings = 1;
2654 ldfile->return_widestr = 1;
2655
4b10dd6c 2656 /* We proceed until we see the `translit_end' token. */
47e8b443 2657 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
4b10dd6c
UD
2658 now->tok != tok_translit_end && now->tok != tok_eof)
2659 {
2660 if (now->tok == tok_eol)
2661 /* Ignore empty lines. */
2662 continue;
2663
4b10dd6c
UD
2664 if (now->tok == tok_include)
2665 {
2666 /* We have to include locale. */
2667 const char *locale_name;
2668 const char *repertoire_name;
02fb3d17 2669 struct translit_include_t *include_stmt, **include_ptr;
4b10dd6c 2670
47e8b443 2671 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2672 /* This should be a string or an identifier. In any
2673 case something to name a locale. */
2674 if (now->tok != tok_string && now->tok != tok_ident)
2675 {
2676 translit_syntax:
2677 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2678 lr_ignore_rest (ldfile, 0);
2679 continue;
2680 }
2681 locale_name = now->val.str.startmb;
2682
2683 /* Next should be a semicolon. */
47e8b443 2684 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2685 if (now->tok != tok_semicolon)
2686 goto translit_syntax;
2687
2688 /* Now the repertoire name. */
47e8b443 2689 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2690 if ((now->tok != tok_string && now->tok != tok_ident)
2691 || now->val.str.startmb == NULL)
2692 goto translit_syntax;
2693 repertoire_name = now->val.str.startmb;
d9cab009
RM
2694 if (repertoire_name[0] == '\0')
2695 /* Ignore the empty string. */
2696 repertoire_name = NULL;
4b10dd6c 2697
02fb3d17
UD
2698 /* Save the include statement for later processing. */
2699 include_stmt = (struct translit_include_t *)
2700 xmalloc (sizeof (struct translit_include_t));
2701 include_stmt->copy_locale = locale_name;
2702 include_stmt->copy_repertoire = repertoire_name;
2703 include_stmt->next = NULL;
4b10dd6c 2704
02fb3d17
UD
2705 include_ptr = &ctype->translit_include;
2706 while (*include_ptr != NULL)
2707 include_ptr = &(*include_ptr)->next;
2708 *include_ptr = include_stmt;
4b10dd6c
UD
2709
2710 /* The rest of the line must be empty. */
2711 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2712
2713 /* Make sure the locale is read. */
02fb3d17
UD
2714 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2715 1, NULL);
a673fbcb
UD
2716 continue;
2717 }
2718 else if (now->tok == tok_default_missing)
2719 {
2720 uint32_t *wstr;
2721
c9f79e08 2722 while (1)
a673fbcb 2723 {
c9f79e08
UD
2724 /* We expect a single character or string as the
2725 argument. */
47e8b443 2726 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
c9f79e08
UD
2727 wstr = read_widestring (ldfile, now, charmap,
2728 repertoire);
2729
2730 if (wstr != NULL)
a673fbcb 2731 {
c9f79e08
UD
2732 if (ctype->default_missing != NULL)
2733 {
2734 lr_error (ldfile, _("\
a673fbcb 2735%s: duplicate `default_missing' definition"), "LC_CTYPE");
f16491eb
CD
2736 record_error_at_line (0, 0,
2737 ctype->default_missing_file,
2738 ctype->default_missing_lineno,
2739 _("\
2740previous definition was here"));
c9f79e08
UD
2741 }
2742 else
2743 {
2744 ctype->default_missing = wstr;
2745 ctype->default_missing_file = ldfile->fname;
2746 ctype->default_missing_lineno = ldfile->lineno;
2747 }
4b156cb2
UD
2748 /* We can have more entries, ignore them. */
2749 lr_ignore_rest (ldfile, 0);
c9f79e08 2750 break;
a673fbcb 2751 }
c9f79e08
UD
2752 else if (wstr == (uint32_t *) -1l)
2753 /* This was an syntax error. */
2754 break;
2755
2756 /* Maybe there is another replacement we can use. */
47e8b443 2757 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
c9f79e08 2758 if (now->tok == tok_eol || now->tok == tok_eof)
a673fbcb 2759 {
c9f79e08
UD
2760 /* Nothing found. We tell the user. */
2761 lr_error (ldfile, _("\
0232a3ae 2762%s: no representable `default_missing' definition found"), "LC_CTYPE");
c9f79e08 2763 break;
a673fbcb 2764 }
c9f79e08
UD
2765 if (now->tok != tok_semicolon)
2766 goto translit_syntax;
a673fbcb 2767 }
c9f79e08 2768
a673fbcb
UD
2769 continue;
2770 }
2771 else if (now->tok == tok_translit_ignore)
2772 {
2773 read_translit_ignore_entry (ldfile, ctype, charmap,
2774 repertoire);
4b10dd6c
UD
2775 continue;
2776 }
2777
2778 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2779 }
4b156cb2 2780 ldfile->return_widestr = 0;
02fb3d17
UD
2781
2782 if (now->tok == tok_eof)
2783 lr_error (ldfile, _(\
2784"%s: `translit_start' section does not end with `translit_end'"),
2785 "LC_CTYPE");
2786
4b10dd6c
UD
2787 break;
2788
2789 case tok_ident:
b9eb05d6
UD
2790 /* Ignore the rest of the line if we don't need the input of
2791 this line. */
2792 if (ignore_content)
2793 {
2794 lr_ignore_rest (ldfile, 0);
2795 break;
2796 }
2797
4b10dd6c
UD
2798 /* This could mean one of several things. First test whether
2799 it's a character class name. */
2800 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2801 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2802 break;
2803 if (cnt < ctype->nr_charclass)
2804 {
2805 class_bit = _ISwbit (cnt);
2806 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2807 free (now->val.str.startmb);
2808 goto read_charclass;
2809 }
5491da0d
UD
2810 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2811 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2812 break;
2813 if (cnt < ctype->map_collection_nr)
2814 {
2815 mapidx = cnt;
2816 free (now->val.str.startmb);
2817 goto read_mapping;
2818 }
4b10dd6c
UD
2819 break;
2820
2821 case tok_end:
2822 /* Next we assume `LC_CTYPE'. */
47e8b443 2823 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c
UD
2824 if (now->tok == tok_eof)
2825 break;
2826 if (now->tok == tok_eol)
2827 lr_error (ldfile, _("%s: incomplete `END' line"),
2828 "LC_CTYPE");
2829 else if (now->tok != tok_lc_ctype)
2830 lr_error (ldfile, _("\
2831%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2832 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2833 return;
2834
2835 default:
2836 err_label:
2837 if (now->tok != tok_eof)
2838 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2839 }
2840
4b10dd6c 2841 /* Prepare for the next round. */
47e8b443 2842 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
4b10dd6c 2843 nowtok = now->tok;
19bc17a9
RM
2844 }
2845
4b10dd6c
UD
2846 /* When we come here we reached the end of the file. */
2847 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2848}
2849
2850
d7e49b19
RM
2851/* Subroutine of set_class_defaults, below. */
2852static void
2853set_one_default (struct locale_ctype_t *ctype,
2854 const struct charmap_t *charmap,
2855 int bitpos, int from, int to)
2856{
2857 char tmp[2];
2858 int ch;
2859 int bit = _ISbit (bitpos);
2860 int bitw = _ISwbit (bitpos);
2861 /* Define string. */
2862 strcpy (tmp, "?");
2863
2864 for (ch = from; ch <= to; ++ch)
2865 {
2866 struct charseq *seq;
2867 tmp[0] = ch;
2868
2869 seq = charmap_find_value (charmap, tmp, 1);
2870 if (seq == NULL)
2871 {
2872 char buf[10];
2873 sprintf (buf, "U%08X", ch);
2874 seq = charmap_find_value (charmap, buf, 9);
2875 }
2876 if (seq == NULL)
2877 {
f16491eb 2878 record_error (0, 0, _("\
d7e49b19 2879%s: character `%s' not defined while needed as default value"),
f16491eb 2880 "LC_CTYPE", tmp);
d7e49b19
RM
2881 }
2882 else if (seq->nbytes != 1)
f16491eb 2883 record_error (0, 0, _("\
d7e49b19 2884%s: character `%s' in charmap not representable with one byte"),
f16491eb 2885 "LC_CTYPE", tmp);
d7e49b19
RM
2886 else
2887 ctype->class256_collection[seq->bytes[0]] |= bit;
2888
2889 /* No need to search here, the ASCII value is also the Unicode
2890 value. */
2891 ELEM (ctype, class_collection, , ch) |= bitw;
2892 }
2893}
2894
19bc17a9 2895static void
47e8b443
UD
2896set_class_defaults (struct locale_ctype_t *ctype,
2897 const struct charmap_t *charmap,
4b10dd6c 2898 struct repertoire_t *repertoire)
19bc17a9 2899{
d7e49b19
RM
2900#define set_default(bitpos, from, to) \
2901 set_one_default (ctype, charmap, bitpos, from, to)
4b10dd6c 2902
19bc17a9
RM
2903 /* These function defines the default values for the classes and conversions
2904 according to POSIX.2 2.5.2.1.
2905 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2906 Don't move them unless you know what you do! */
2907
19bc17a9 2908 /* Set default values if keyword was not present. */
4b10dd6c 2909 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2910 /* "If this keyword [lower] is not specified, the lowercase letters
2911 `A' through `Z', ..., shall automatically belong to this class,
2912 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2913 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2914
4b10dd6c 2915 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2916 /* "If this keyword [lower] is not specified, the lowercase letters
2917 `a' through `z', ..., shall automatically belong to this class,
2918 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2919 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2920
4b10dd6c 2921 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2922 {
2923 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2924 class `lower' *must* be in class `alpha'. */
2925 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
2926 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2927
d7e49b19 2928 for (size_t cnt = 0; cnt < 256; ++cnt)
96f0d1f5
UD
2929 if ((ctype->class256_collection[cnt] & mask) != 0)
2930 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9 2931
d7e49b19 2932 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2933 if ((ctype->class_collection[cnt] & maskw) != 0)
2934 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
2935 }
2936
4b10dd6c 2937 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2938 /* "If this keyword [digit] is not specified, the digits `0' through
2939 `9', ..., shall automatically belong to this class, with
2940 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2941 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2942
2943 /* "Only characters specified for the `alpha' and `digit' keyword
2944 shall be specified. Characters specified for the keyword `alpha'
2945 and `digit' are automatically included in this class. */
2946 {
2947 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
2948 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2949
d7e49b19 2950 for (size_t cnt = 0; cnt < 256; ++cnt)
96f0d1f5
UD
2951 if ((ctype->class256_collection[cnt] & mask) != 0)
2952 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9 2953
d7e49b19 2954 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2955 if ((ctype->class_collection[cnt] & maskw) != 0)
2956 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
2957 }
2958
4b10dd6c 2959 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2960 /* "If this keyword [space] is not specified, the characters <space>,
2961 <form-feed>, <newline>, <carriage-return>, <tab>, and
2962 <vertical-tab>, ..., shall automatically belong to this class,
2963 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2964 {
4b10dd6c 2965 struct charseq *seq;
19bc17a9 2966
4b10dd6c 2967 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2968 if (seq == NULL)
2969 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2970 if (seq == NULL)
2971 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2972 if (seq == NULL)
880f421f 2973 {
f16491eb 2974 record_error (0, 0, _("\
4b10dd6c 2975%s: character `%s' not defined while needed as default value"),
f16491eb 2976 "LC_CTYPE", "<space>");
4b10dd6c
UD
2977 }
2978 else if (seq->nbytes != 1)
f16491eb 2979 record_error (0, 0, _("\
4b10dd6c 2980%s: character `%s' in charmap not representable with one byte"),
f16491eb 2981 "LC_CTYPE", "<space>");
4b10dd6c
UD
2982 else
2983 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2984
f0a4b6b1 2985 /* No need to search. */
ce177a84 2986 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
19bc17a9 2987
4b10dd6c 2988 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
2989 if (seq == NULL)
2990 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 2991 if (seq == NULL)
880f421f 2992 {
f16491eb 2993 record_error (0, 0, _("\
4b10dd6c 2994%s: character `%s' not defined while needed as default value"),
f16491eb 2995 "LC_CTYPE", "<form-feed>");
4b10dd6c
UD
2996 }
2997 else if (seq->nbytes != 1)
f16491eb 2998 record_error (0, 0, _("\
4b10dd6c 2999%s: character `%s' in charmap not representable with one byte"),
f16491eb 3000 "LC_CTYPE", "<form-feed>");
4b10dd6c
UD
3001 else
3002 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3003
f0a4b6b1 3004 /* No need to search. */
ce177a84 3005 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
4b10dd6c 3006
19bc17a9 3007
4b10dd6c 3008 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
3009 if (seq == NULL)
3010 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 3011 if (seq == NULL)
880f421f 3012 {
f16491eb 3013 record_error (0, 0, _("\
11bf311e 3014%s: character `%s' not defined while needed as default value"),
f16491eb 3015 "LC_CTYPE", "<newline>");
4b10dd6c
UD
3016 }
3017 else if (seq->nbytes != 1)
f16491eb 3018 record_error (0, 0, _("\
4b10dd6c 3019%s: character `%s' in charmap not representable with one byte"),
f16491eb 3020 "LC_CTYPE", "<newline>");
4b10dd6c
UD
3021 else
3022 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3023
f0a4b6b1 3024 /* No need to search. */
ce177a84 3025 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
4b10dd6c 3026
19bc17a9 3027
4b10dd6c 3028 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
3029 if (seq == NULL)
3030 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 3031 if (seq == NULL)
880f421f 3032 {
f16491eb 3033 record_error (0, 0, _("\
4b10dd6c 3034%s: character `%s' not defined while needed as default value"),
f16491eb 3035 "LC_CTYPE", "<carriage-return>");
4b10dd6c
UD
3036 }
3037 else if (seq->nbytes != 1)
f16491eb 3038 record_error (0, 0, _("\
4b10dd6c 3039%s: character `%s' in charmap not representable with one byte"),
f16491eb 3040 "LC_CTYPE", "<carriage-return>");
4b10dd6c
UD
3041 else
3042 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3043
f0a4b6b1 3044 /* No need to search. */
ce177a84 3045 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
4b10dd6c 3046
19bc17a9 3047
4b10dd6c 3048 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
3049 if (seq == NULL)
3050 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 3051 if (seq == NULL)
880f421f 3052 {
f16491eb 3053 record_error (0, 0, _("\
4b10dd6c 3054%s: character `%s' not defined while needed as default value"),
f16491eb 3055 "LC_CTYPE", "<tab>");
4b10dd6c
UD
3056 }
3057 else if (seq->nbytes != 1)
f16491eb 3058 record_error (0, 0, _("\
4b10dd6c 3059%s: character `%s' in charmap not representable with one byte"),
f16491eb 3060 "LC_CTYPE", "<tab>");
4b10dd6c
UD
3061 else
3062 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3063
f0a4b6b1 3064 /* No need to search. */
ce177a84 3065 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
4b10dd6c 3066
4b10dd6c
UD
3067
3068 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
3069 if (seq == NULL)
3070 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
3071 if (seq == NULL)
3072 {
f16491eb 3073 record_error (0, 0, _("\
4b10dd6c 3074%s: character `%s' not defined while needed as default value"),
f16491eb 3075 "LC_CTYPE", "<vertical-tab>");
4b10dd6c
UD
3076 }
3077 else if (seq->nbytes != 1)
f16491eb 3078 record_error (0, 0, _("\
4b10dd6c 3079%s: character `%s' in charmap not representable with one byte"),
f16491eb 3080 "LC_CTYPE", "<vertical-tab>");
4b10dd6c
UD
3081 else
3082 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
3083
3084 /* No need to search. */
ce177a84 3085 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
19bc17a9
RM
3086 }
3087
4b10dd6c 3088 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
3089 /* "If this keyword is not specified, the digits `0' to `9', the
3090 uppercase letters `A' through `F', and the lowercase letters `a'
3091 through `f', ..., shell automatically belong to this class, with
3092 implementation defined character values." [P1003.2, 2.5.2.1] */
3093 {
4b10dd6c
UD
3094 set_default (BITPOS (tok_xdigit), '0', '9');
3095 set_default (BITPOS (tok_xdigit), 'A', 'F');
3096 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
3097 }
3098
4b10dd6c 3099 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
3100 /* "If this keyword [blank] is unspecified, the characters <space> and
3101 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3102 {
4b10dd6c 3103 struct charseq *seq;
19bc17a9 3104
4b10dd6c 3105 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3106 if (seq == NULL)
3107 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3108 if (seq == NULL)
3109 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 3110 if (seq == NULL)
880f421f 3111 {
f16491eb 3112 record_error (0, 0, _("\
4b10dd6c 3113%s: character `%s' not defined while needed as default value"),
f16491eb 3114 "LC_CTYPE", "<space>");
4b10dd6c
UD
3115 }
3116 else if (seq->nbytes != 1)
f16491eb 3117 record_error (0, 0, _("\
4b10dd6c 3118%s: character `%s' in charmap not representable with one byte"),
f16491eb 3119 "LC_CTYPE", "<space>");
4b10dd6c
UD
3120 else
3121 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3122
f0a4b6b1 3123 /* No need to search. */
ce177a84 3124 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
4b10dd6c 3125
4b10dd6c
UD
3126
3127 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
3128 if (seq == NULL)
3129 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
3130 if (seq == NULL)
3131 {
f16491eb 3132 record_error (0, 0, _("\
4b10dd6c 3133%s: character `%s' not defined while needed as default value"),
f16491eb 3134 "LC_CTYPE", "<tab>");
4b10dd6c
UD
3135 }
3136 else if (seq->nbytes != 1)
f16491eb 3137 record_error (0, 0, _("\
4b10dd6c 3138%s: character `%s' in charmap not representable with one byte"),
f16491eb 3139 "LC_CTYPE", "<tab>");
4b10dd6c
UD
3140 else
3141 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
3142
3143 /* No need to search. */
ce177a84 3144 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
19bc17a9
RM
3145 }
3146
4b10dd6c 3147 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
3148 /* "If this keyword [graph] is not specified, characters specified for
3149 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3150 shall belong to this character class." [P1003.2, 2.5.2.1] */
3151 {
34a5a146
JM
3152 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3153 | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3154 | BIT (tok_punct);
3155 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3156 | BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3157 | BITw (tok_punct);
19bc17a9 3158
d7e49b19 3159 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3160 if ((ctype->class_collection[cnt] & maskw) != 0)
3161 ctype->class_collection[cnt] |= BITw (tok_graph);
4b10dd6c 3162
d7e49b19 3163 for (size_t cnt = 0; cnt < 256; ++cnt)
4b10dd6c
UD
3164 if ((ctype->class256_collection[cnt] & mask) != 0)
3165 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
3166 }
3167
4b10dd6c 3168 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
3169 /* "If this keyword [print] is not provided, characters specified for
3170 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3171 and the <space> character shall belong to this character class."
3172 [P1003.2, 2.5.2.1] */
3173 {
34a5a146
JM
3174 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3175 | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3176 | BIT (tok_punct);
3177 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3178 | BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3179 | BITw (tok_punct);
4b10dd6c 3180 struct charseq *seq;
19bc17a9 3181
d7e49b19 3182 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3183 if ((ctype->class_collection[cnt] & maskw) != 0)
3184 ctype->class_collection[cnt] |= BITw (tok_print);
19bc17a9 3185
d7e49b19 3186 for (size_t cnt = 0; cnt < 256; ++cnt)
4b10dd6c
UD
3187 if ((ctype->class256_collection[cnt] & mask) != 0)
3188 ctype->class256_collection[cnt] |= BIT (tok_print);
3189
3190
4b10dd6c 3191 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3192 if (seq == NULL)
3193 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3194 if (seq == NULL)
3195 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
3196 if (seq == NULL)
3197 {
f16491eb 3198 record_error (0, 0, _("\
4b10dd6c 3199%s: character `%s' not defined while needed as default value"),
f16491eb 3200 "LC_CTYPE", "<space>");
4b10dd6c
UD
3201 }
3202 else if (seq->nbytes != 1)
f16491eb 3203 record_error (0, 0, _("\
4b10dd6c 3204%s: character `%s' in charmap not representable with one byte"),
f16491eb 3205 "LC_CTYPE", "<space>");
4b10dd6c
UD
3206 else
3207 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
3208
3209 /* No need to search. */
ce177a84 3210 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
19bc17a9
RM
3211 }
3212
4b10dd6c 3213 if (ctype->tomap_done[0] == 0)
6d52618b 3214 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
3215 `a' through `z', and their corresponding uppercase letters `A' to
3216 `Z', ..., shall automatically be included, with implementation-
3217 defined character values." [P1003.2, 2.5.2.1] */
3218 {
3219 char tmp[4];
3220 int ch;
3221
3222 strcpy (tmp, "<?>");
3223
3224 for (ch = 'a'; ch <= 'z'; ++ch)
3225 {
4b10dd6c 3226 struct charseq *seq_from, *seq_to;
19bc17a9
RM
3227
3228 tmp[1] = (char) ch;
3229
4b10dd6c 3230 seq_from = charmap_find_value (charmap, &tmp[1], 1);
69c69fe1
UD
3231 if (seq_from == NULL)
3232 {
3233 char buf[10];
3234 sprintf (buf, "U%08X", ch);
3235 seq_from = charmap_find_value (charmap, buf, 9);
3236 }
4b10dd6c 3237 if (seq_from == NULL)
19bc17a9 3238 {
f16491eb 3239 record_error (0, 0, _("\
4b10dd6c 3240%s: character `%s' not defined while needed as default value"),
f16491eb 3241 "LC_CTYPE", tmp);
4b10dd6c
UD
3242 }
3243 else if (seq_from->nbytes != 1)
3244 {
f16491eb 3245 record_error (0, 0, _("\
4b10dd6c 3246%s: character `%s' needed as default value not representable with one byte"),
f16491eb 3247 "LC_CTYPE", tmp);
4b10dd6c
UD
3248 }
3249 else
3250 {
3251 /* This conversion is implementation defined. */
3252 tmp[1] = (char) (ch + ('A' - 'a'));
3253 seq_to = charmap_find_value (charmap, &tmp[1], 1);
69c69fe1
UD
3254 if (seq_to == NULL)
3255 {
3256 char buf[10];
3257 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3258 seq_to = charmap_find_value (charmap, buf, 9);
3259 }
4b10dd6c
UD
3260 if (seq_to == NULL)
3261 {
f16491eb 3262 record_error (0, 0, _("\
4b10dd6c 3263%s: character `%s' not defined while needed as default value"),
f16491eb 3264 "LC_CTYPE", tmp);
4b10dd6c
UD
3265 }
3266 else if (seq_to->nbytes != 1)
3267 {
f16491eb 3268 record_error (0, 0, _("\
4b10dd6c 3269%s: character `%s' needed as default value not representable with one byte"),
f16491eb 3270 "LC_CTYPE", tmp);
4b10dd6c
UD
3271 }
3272 else
3273 /* The index [0] is determined by the order of the
3274 `ctype_map_newP' calls in `ctype_startup'. */
3275 ctype->map256_collection[0][seq_from->bytes[0]]
3276 = seq_to->bytes[0];
19bc17a9 3277 }
f0a4b6b1
UD
3278
3279 /* No need to search. */
3280 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
3281 }
3282 }
3283
4b10dd6c 3284 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
3285 /* "If this keyword [tolower] is not specified, the mapping shall be
3286 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3287 {
d7e49b19 3288 for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
19bc17a9
RM
3289 if (ctype->map_collection[0][cnt] != 0)
3290 ELEM (ctype, map_collection, [1],
3291 ctype->map_collection[0][cnt])
3292 = ctype->charnames[cnt];
4b10dd6c 3293
d7e49b19 3294 for (size_t cnt = 0; cnt < 256; ++cnt)
4b10dd6c 3295 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 3296 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
3297 }
3298
69c69fe1 3299 if (ctype->outdigits_act != 10)
4b10dd6c 3300 {
69c69fe1 3301 if (ctype->outdigits_act != 0)
f16491eb 3302 record_error (0, 0, _("\
f2b98f97 3303%s: field `%s' does not contain exactly ten entries"),
f16491eb 3304 "LC_CTYPE", "outdigit");
69c69fe1 3305
d7e49b19 3306 for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
4b10dd6c
UD
3307 {
3308 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
701666b7
UD
3309 (char *) digits + cnt,
3310 1);
4b10dd6c
UD
3311
3312 if (ctype->mboutdigits[cnt] == NULL)
1b97149d
UD
3313 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3314 longnames[cnt],
3315 strlen (longnames[cnt]));
b9eb05d6 3316
1b97149d
UD
3317 if (ctype->mboutdigits[cnt] == NULL)
3318 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3319 uninames[cnt], 9);
b9eb05d6 3320
1b97149d 3321 if (ctype->mboutdigits[cnt] == NULL)
b9eb05d6 3322 {
1b97149d 3323 /* Provide a replacement. */
f16491eb
CD
3324 record_error (0, 0, _("\
3325no output digits defined and none of the standard names in the charmap"));
b9eb05d6 3326
47e8b443 3327 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
1b97149d
UD
3328 sizeof (struct charseq)
3329 + 1);
b9eb05d6 3330
1b97149d
UD
3331 /* This is better than nothing. */
3332 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3333 ctype->mboutdigits[cnt]->nbytes = 1;
b9eb05d6 3334 }
1b97149d
UD
3335
3336 ctype->wcoutdigits[cnt] = L'0' + cnt;
4b10dd6c
UD
3337 }
3338
3339 ctype->outdigits_act = 10;
19bc17a9 3340 }
d7e49b19
RM
3341
3342#undef set_default
19bc17a9
RM
3343}
3344
3345
ef446144
UD
3346/* Initialize. Assumes t->p and t->q have already been set. */
3347static inline void
3348wctype_table_init (struct wctype_table *t)
3349{
d6040f17 3350 t->level1 = NULL;
ef446144 3351 t->level1_alloc = t->level1_size = 0;
d6040f17 3352 t->level2 = NULL;
ef446144 3353 t->level2_alloc = t->level2_size = 0;
d6040f17 3354 t->level3 = NULL;
ef446144
UD
3355 t->level3_alloc = t->level3_size = 0;
3356}
3357
ec08818d
UD
3358/* Retrieve an entry. */
3359static inline int
3360wctype_table_get (struct wctype_table *t, uint32_t wc)
3361{
3362 uint32_t index1 = wc >> (t->q + t->p + 5);
3363 if (index1 < t->level1_size)
3364 {
3365 uint32_t lookup1 = t->level1[index1];
bd75759f 3366 if (lookup1 != EMPTY)
ec08818d
UD
3367 {
3368 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3369 + (lookup1 << t->q);
3370 uint32_t lookup2 = t->level2[index2];
bd75759f 3371 if (lookup2 != EMPTY)
ec08818d
UD
3372 {
3373 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3374 + (lookup2 << t->p);
3375 uint32_t lookup3 = t->level3[index3];
3376 uint32_t index4 = wc & 0x1f;
3377
3378 return (lookup3 >> index4) & 1;
3379 }
3380 }
3381 }
3382 return 0;
3383}
3384
ef446144
UD
3385/* Add one entry. */
3386static void
3387wctype_table_add (struct wctype_table *t, uint32_t wc)
3388{
3389 uint32_t index1 = wc >> (t->q + t->p + 5);
3390 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3391 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3392 uint32_t index4 = wc & 0x1f;
3393 size_t i, i1, i2;
3394
3395 if (index1 >= t->level1_size)
3396 {
3397 if (index1 >= t->level1_alloc)
3398 {
3399 size_t alloc = 2 * t->level1_alloc;
3400 if (alloc <= index1)
3401 alloc = index1 + 1;
d6040f17
UD
3402 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3403 alloc * sizeof (uint32_t));
ef446144
UD
3404 t->level1_alloc = alloc;
3405 }
3406 while (index1 >= t->level1_size)
bd75759f 3407 t->level1[t->level1_size++] = EMPTY;
ef446144
UD
3408 }
3409
bd75759f 3410 if (t->level1[index1] == EMPTY)
ef446144
UD
3411 {
3412 if (t->level2_size == t->level2_alloc)
3413 {
3414 size_t alloc = 2 * t->level2_alloc + 1;
d6040f17
UD
3415 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3416 (alloc << t->q) * sizeof (uint32_t));
ef446144
UD
3417 t->level2_alloc = alloc;
3418 }
3419 i1 = t->level2_size << t->q;
3420 i2 = (t->level2_size + 1) << t->q;
3421 for (i = i1; i < i2; i++)
bd75759f 3422 t->level2[i] = EMPTY;
ef446144
UD
3423 t->level1[index1] = t->level2_size++;
3424 }
3425
3426 index2 += t->level1[index1] << t->q;
3427
bd75759f 3428 if (t->level2[index2] == EMPTY)
ef446144
UD
3429 {
3430 if (t->level3_size == t->level3_alloc)
3431 {
3432 size_t alloc = 2 * t->level3_alloc + 1;
d6040f17
UD
3433 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3434 (alloc << t->p) * sizeof (uint32_t));
ef446144
UD
3435 t->level3_alloc = alloc;
3436 }
3437 i1 = t->level3_size << t->p;
3438 i2 = (t->level3_size + 1) << t->p;
3439 for (i = i1; i < i2; i++)
3440 t->level3[i] = 0;
3441 t->level2[index2] = t->level3_size++;
3442 }
3443
3444 index3 += t->level2[index2] << t->p;
3445
3446 t->level3[index3] |= (uint32_t)1 << index4;
3447}
3448
3449/* Finalize and shrink. */
3450static void
1ecbb381 3451add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
ef446144
UD
3452{
3453 size_t i, j, k;
3454 uint32_t reorder3[t->level3_size];
3455 uint32_t reorder2[t->level2_size];
1ecbb381 3456 uint32_t level2_offset, level3_offset;
ef446144
UD
3457
3458 /* Uniquify level3 blocks. */
3459 k = 0;
3460 for (j = 0; j < t->level3_size; j++)
3461 {
3462 for (i = 0; i < k; i++)
3463 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3464 (1 << t->p) * sizeof (uint32_t)) == 0)
3465 break;
3466 /* Relocate block j to block i. */
3467 reorder3[j] = i;
3468 if (i == k)
3469 {
3470 if (i != j)
3471 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3472 (1 << t->p) * sizeof (uint32_t));
3473 k++;
3474 }
3475 }
3476 t->level3_size = k;
3477
3478 for (i = 0; i < (t->level2_size << t->q); i++)
bd75759f 3479 if (t->level2[i] != EMPTY)
ef446144
UD
3480 t->level2[i] = reorder3[t->level2[i]];
3481
3482 /* Uniquify level2 blocks. */
3483 k = 0;
3484 for (j = 0; j < t->level2_size; j++)
3485 {
3486 for (i = 0; i < k; i++)
3487 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3488 (1 << t->q) * sizeof (uint32_t)) == 0)
3489 break;
3490 /* Relocate block j to block i. */
3491 reorder2[j] = i;
3492 if (i == k)
3493 {
3494 if (i != j)
3495 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3496 (1 << t->q) * sizeof (uint32_t));
3497 k++;
3498 }
3499 }
3500 t->level2_size = k;
3501
3502 for (i = 0; i < t->level1_size; i++)
bd75759f 3503 if (t->level1[i] != EMPTY)
ef446144
UD
3504 t->level1[i] = reorder2[t->level1[i]];
3505
ef446144
UD
3506 t->result_size =
3507 5 * sizeof (uint32_t)
3508 + t->level1_size * sizeof (uint32_t)
3509 + (t->level2_size << t->q) * sizeof (uint32_t)
3510 + (t->level3_size << t->p) * sizeof (uint32_t);
ef446144 3511
ef446144
UD
3512 level2_offset =
3513 5 * sizeof (uint32_t)
3514 + t->level1_size * sizeof (uint32_t);
3515 level3_offset =
3516 5 * sizeof (uint32_t)
3517 + t->level1_size * sizeof (uint32_t)
3518 + (t->level2_size << t->q) * sizeof (uint32_t);
3519
1ecbb381
RS
3520 start_locale_structure (file);
3521 add_locale_uint32 (file, t->q + t->p + 5);
3522 add_locale_uint32 (file, t->level1_size);
3523 add_locale_uint32 (file, t->p + 5);
3524 add_locale_uint32 (file, (1 << t->q) - 1);
3525 add_locale_uint32 (file, (1 << t->p) - 1);
ef446144
UD
3526
3527 for (i = 0; i < t->level1_size; i++)
1ecbb381
RS
3528 add_locale_uint32
3529 (file,
3530 t->level1[i] == EMPTY
ef446144
UD
3531 ? 0
3532 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3533
3534 for (i = 0; i < (t->level2_size << t->q); i++)
1ecbb381
RS
3535 add_locale_uint32
3536 (file,
3537 t->level2[i] == EMPTY
ef446144
UD
3538 ? 0
3539 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3540
1ecbb381
RS
3541 add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3542 end_locale_structure (file);
ef446144
UD
3543
3544 if (t->level1_alloc > 0)
3545 free (t->level1);
3546 if (t->level2_alloc > 0)
3547 free (t->level2);
3548 if (t->level3_alloc > 0)
3549 free (t->level3);
3550}
3551
02fb3d17
UD
3552/* Flattens the included transliterations into a translit list.
3553 Inserts them in the list at `cursor', and returns the new cursor. */
3554static struct translit_t **
47e8b443
UD
3555translit_flatten (struct locale_ctype_t *ctype,
3556 const struct charmap_t *charmap,
02fb3d17
UD
3557 struct translit_t **cursor)
3558{
3559 while (ctype->translit_include != NULL)
3560 {
3561 const char *copy_locale = ctype->translit_include->copy_locale;
3562 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3563 struct localedef_t *other;
3564
3565 /* Unchain the include statement. During the depth-first traversal
3566 we don't want to visit any locale more than once. */
3567 ctype->translit_include = ctype->translit_include->next;
3568
3569 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3570
6e310111 3571 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
02fb3d17 3572 {
f16491eb 3573 record_error (0, 0, _("\
02fb3d17 3574%s: transliteration data from locale `%s' not available"),
f16491eb 3575 "LC_CTYPE", copy_locale);
02fb3d17
UD
3576 }
3577 else
3578 {
3579 struct locale_ctype_t *other_ctype =
3580 other->categories[LC_CTYPE].ctype;
3581
3582 cursor = translit_flatten (other_ctype, charmap, cursor);
3583 assert (other_ctype->translit_include == NULL);
3584
3585 if (other_ctype->translit != NULL)
3586 {
3587 /* Insert the other_ctype->translit list at *cursor. */
3588 struct translit_t *endp = other_ctype->translit;
3589 while (endp->next != NULL)
3590 endp = endp->next;
3591
3592 endp->next = *cursor;
3593 *cursor = other_ctype->translit;
3594
3595 /* Avoid any risk of circular lists. */
3596 other_ctype->translit = NULL;
3597
3598 cursor = &endp->next;
3599 }
3600
3601 if (ctype->default_missing == NULL)
3602 ctype->default_missing = other_ctype->default_missing;
3603 }
3604 }
3605
3606 return cursor;
3607}
3608
19bc17a9 3609static void
47e8b443 3610allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
4b10dd6c 3611 struct repertoire_t *repertoire)
19bc17a9 3612{
4c7d276e 3613 size_t idx, nr;
0e16ecfa
UD
3614 const void *key;
3615 size_t len;
3616 void *vdata;
3617 void *curs;
5d431a3e 3618
19bc17a9
RM
3619 /* You wonder about this amount of memory? This is only because some
3620 users do not manage to address the array with unsigned values or
3621 data types with range >= 256. '\200' would result in the array
3622 index -128. To help these poor people we duplicate the entries for
3623 128 up to 255 below the entry for \0. */
4c7d276e
UD
3624 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3625 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3626 ctype->class_b = (uint32_t **)
3627 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
1ecbb381
RS
3628 ctype->class_3level = (struct wctype_table *)
3629 xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
19bc17a9 3630
4a33c2f5 3631 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 3632 for (idx = 0; idx < 256; ++idx)
4a33c2f5 3633 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 3634
75cd5204
RM
3635 /* Mirror first 127 entries. We must take care that entry -1 is not
3636 mirrored because EOF == -1. */
3637 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
3638 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3639
4c7d276e
UD
3640 /* The 32 bit array contains all characters < 0x100. */
3641 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3642 if (ctype->charnames[idx] < 0x100)
3643 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
ef446144 3644
4c7d276e 3645 for (nr = 0; nr < ctype->nr_charclass; nr++)
ef446144 3646 {
4c7d276e 3647 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
ef446144 3648
9a5c46e8
UD
3649 /* We only set CLASS_B for the bits in the ISO C classes, not
3650 the user defined classes. The number should not change but
3651 who knows. */
3652#define LAST_ISO_C_BIT 11
3653 if (nr <= LAST_ISO_C_BIT)
3654 for (idx = 0; idx < 256; ++idx)
3655 if (ctype->class256_collection[idx] & _ISbit (nr))
3656 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
4c7d276e 3657 }
8fb81470 3658
4c7d276e
UD
3659 for (nr = 0; nr < ctype->nr_charclass; nr++)
3660 {
1ecbb381 3661 struct wctype_table *t;
ef446144 3662
1ecbb381
RS
3663 t = &ctype->class_3level[nr];
3664 t->p = 4; /* or: 5 */
3665 t->q = 7; /* or: 6 */
3666 wctype_table_init (t);
ef446144 3667
4c7d276e
UD
3668 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3669 if (ctype->class_collection[idx] & _ISwbit (nr))
1ecbb381 3670 wctype_table_add (t, ctype->charnames[idx]);
ef446144 3671
f16491eb 3672 record_verbose (stderr, _("\
ea91c315 3673%s: table for class \"%s\": %lu bytes"),
f16491eb
CD
3674 "LC_CTYPE", ctype->classnames[nr],
3675 (unsigned long int) t->result_size);
ef446144 3676 }
19bc17a9
RM
3677
3678 /* Room for table of mappings. */
4c7d276e
UD
3679 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3680 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3681 * sizeof (uint32_t *));
1ecbb381
RS
3682 ctype->map_3level = (struct wctrans_table *)
3683 xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
19bc17a9
RM
3684
3685 /* Fill in all mappings. */
49f2be5b 3686 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
3687 {
3688 unsigned int idx2;
3689
3690 /* Allocate table. */
4c7d276e
UD
3691 ctype->map_b[idx] = (uint32_t *)
3692 xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
3693
3694 /* Copy values from collection. */
4b10dd6c 3695 for (idx2 = 0; idx2 < 256; ++idx2)
4c7d276e 3696 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 3697
75cd5204
RM
3698 /* Mirror first 127 entries. We must take care not to map entry
3699 -1 because EOF == -1. */
3700 for (idx2 = 0; idx2 < 127; ++idx2)
4c7d276e 3701 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
19bc17a9 3702
75cd5204 3703 /* EOF must map to EOF. */
4c7d276e 3704 ctype->map_b[idx][127] = EOF;
49f2be5b 3705 }
a9c27b3e 3706
49f2be5b
UD
3707 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3708 {
3709 unsigned int idx2;
3710
3711 /* Allocate table. */
4c7d276e 3712 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
49f2be5b 3713
4c7d276e 3714 /* Copy values from collection. Default is identity mapping. */
49f2be5b 3715 for (idx2 = 0; idx2 < 256; ++idx2)
4c7d276e
UD
3716 ctype->map32_b[idx][idx2] =
3717 (ctype->map_collection[idx][idx2] != 0
3718 ? ctype->map_collection[idx][idx2]
3719 : idx2);
ef446144
UD
3720 }
3721
4c7d276e 3722 for (nr = 0; nr < ctype->map_collection_nr; nr++)
ef446144 3723 {
1ecbb381 3724 struct wctrans_table *t;
ef446144 3725
1ecbb381
RS
3726 t = &ctype->map_3level[nr];
3727 t->p = 7;
3728 t->q = 9;
3729 wctrans_table_init (t);
ef446144 3730
4c7d276e
UD
3731 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3732 if (ctype->map_collection[nr][idx] != 0)
1ecbb381 3733 wctrans_table_add (t, ctype->charnames[idx],
4c7d276e 3734 ctype->map_collection[nr][idx]);
ef446144 3735
f16491eb 3736 record_verbose (stderr, _("\
ea91c315 3737%s: table for map \"%s\": %lu bytes"),
f16491eb
CD
3738 "LC_CTYPE", ctype->mapnames[nr],
3739 (unsigned long int) t->result_size);
19bc17a9
RM
3740 }
3741
3742 /* Extra array for class and map names. */
4b10dd6c
UD
3743 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3744 * sizeof (uint32_t));
3745 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3746 * sizeof (uint32_t));
75cd5204 3747
4c7d276e
UD
3748 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3749 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
5866b131 3750
4a9dcff1
UD
3751 /* Array for width information. Because the expected widths are very
3752 small (never larger than 2) we use only one single byte. This
3753 saves space.
3754 We put only printable characters in the table. wcwidth is specified
3755 to return -1 for non-printable characters. Doing the check here
3756 saves a run-time check.
3757 But we put L'\0' in the table. This again saves a run-time check. */
4c7d276e 3758 {
1ecbb381 3759 struct wcwidth_table *t;
ef446144 3760
1ecbb381
RS
3761 t = &ctype->width;
3762 t->p = 7;
3763 t->q = 9;
3764 wcwidth_table_init (t);
ef446144 3765
4a9dcff1
UD
3766 /* First set all the printable characters of the character set to
3767 the default width. */
4c7d276e
UD
3768 curs = NULL;
3769 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3770 {
3771 struct charseq *data = (struct charseq *) vdata;
0e16ecfa 3772
4c7d276e
UD
3773 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3774 data->ucs4 = repertoire_find_value (ctype->repertoire,
3775 data->name, len);
ef446144 3776
4c7d276e 3777 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4a9dcff1
UD
3778 {
3779 uint32_t *class_bits =
3780 find_idx (ctype, &ctype->class_collection, NULL,
3781 &ctype->class_collection_act, data->ucs4);
3782
3783 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
1ecbb381 3784 wcwidth_table_add (t, data->ucs4, charmap->width_default);
4a9dcff1 3785 }
4c7d276e 3786 }
ef446144 3787
4c7d276e
UD
3788 /* Now add the explicitly specified widths. */
3789 if (charmap->width_rules != NULL)
d7e49b19
RM
3790 for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3791 {
3792 unsigned char bytes[charmap->mb_cur_max];
3793 int nbytes = charmap->width_rules[cnt].from->nbytes;
3794
3795 /* We have the range of character for which the width is
3796 specified described using byte sequences of the multibyte
3797 charset. We have to convert this to UCS4 now. And we
3798 cannot simply convert the beginning and the end of the
3799 sequence, we have to iterate over the byte sequence and
3800 convert it for every single character. */
3801 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3802
3803 while (nbytes < charmap->width_rules[cnt].to->nbytes
3804 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3805 nbytes) <= 0)
3806 {
3807 /* Find the UCS value for `bytes'. */
3808 int inner;
3809 uint32_t wch;
3810 struct charseq *seq =
3811 charmap_find_symbol (charmap, (char *) bytes, nbytes);
3812
3813 if (seq == NULL)
3814 wch = ILLEGAL_CHAR_VALUE;
3815 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3816 wch = seq->ucs4;
3817 else
3818 wch = repertoire_find_value (ctype->repertoire, seq->name,
3819 strlen (seq->name));
3820
3821 if (wch != ILLEGAL_CHAR_VALUE)
3822 {
3823 /* Store the value. */
3824 uint32_t *class_bits =
3825 find_idx (ctype, &ctype->class_collection, NULL,
3826 &ctype->class_collection_act, wch);
3827
3828 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3829 wcwidth_table_add (t, wch,
3830 charmap->width_rules[cnt].width);
3831 }
3832
3833 /* "Increment" the bytes sequence. */
3834 inner = nbytes - 1;
3835 while (inner >= 0 && bytes[inner] == 0xff)
3836 --inner;
3837
3838 if (inner < 0)
3839 {
3840 /* We have to extend the byte sequence. */
3841 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3842 break;
3843
3844 bytes[0] = 1;
3845 memset (&bytes[1], 0, nbytes);
3846 ++nbytes;
3847 }
3848 else
3849 {
3850 ++bytes[inner];
3851 while (++inner < nbytes)
3852 bytes[inner] = 0;
3853 }
3854 }
3855 }
ef446144 3856
4a9dcff1 3857 /* Set the width of L'\0' to 0. */
1ecbb381 3858 wcwidth_table_add (t, 0, 0);
ef446144 3859
ea91c315 3860 record_verbose (stderr, _("%s: table for width: %lu bytes"),
f16491eb 3861 "LC_CTYPE", (unsigned long int) t->result_size);
4c7d276e 3862 }
0e16ecfa 3863
4b10dd6c
UD
3864 /* Set MB_CUR_MAX. */
3865 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 3866
4b10dd6c
UD
3867 /* Now determine the table for the transliteration information.
3868
3869 XXX It is not yet clear to me whether it is worth implementing a
3870 complicated algorithm which uses a hash table to locate the entries.
3871 For now I'll use a simple array which can be searching using binary
3872 search. */
02fb3d17
UD
3873 if (ctype->translit_include != NULL)
3874 /* Traverse the locales mentioned in the `include' statements in a
3875 depth-first way and fold in their transliteration information. */
3876 translit_flatten (ctype, charmap, &ctype->translit);
4b10dd6c
UD
3877
3878 if (ctype->translit != NULL)
3879 {
3880 /* First count how many entries we have. This is the upper limit
3881 since some entries from the included files might be overwritten. */
3882 size_t number = 0;
4b10dd6c
UD
3883 struct translit_t *runp = ctype->translit;
3884 struct translit_t **sorted;
3885 size_t from_len, to_len;
3886
3887 while (runp != NULL)
3888 {
3889 ++number;
3890 runp = runp->next;
3891 }
3892
3893 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
3894 sorted = (struct translit_t **) alloca (number
3895 * sizeof (struct translit_t **));
4b10dd6c
UD
3896 runp = ctype->translit;
3897 number = 0;
3898 do
3899 {
3900 /* Search for the place where to insert this string.
3901 XXX Better use a real sorting algorithm later. */
3902 size_t idx = 0;
3903 int replace = 0;
3904
3905 while (idx < number)
3906 {
3907 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3908 (const wchar_t *) runp->from);
3909 if (res == 0)
3910 {
3911 replace = 1;
3912 break;
3913 }
3914 if (res > 0)
3915 break;
3916 ++idx;
3917 }
3918
3919 if (replace)
3920 sorted[idx] = runp;
3921 else
3922 {
3923 memmove (&sorted[idx + 1], &sorted[idx],
3924 (number - idx) * sizeof (struct translit_t *));
3925 sorted[idx] = runp;
3926 ++number;
3927 }
3928
3929 runp = runp->next;
3930 }
3931 while (runp != NULL);
3932
3933 /* The next step is putting all the possible transliteration
3934 strings in one memory block so that we can write it out.
3935 We need several different blocks:
9ca23765 3936 - index to the from-string array
4b10dd6c
UD
3937 - from-string array
3938 - index to the to-string array
3939 - to-string array.
4b10dd6c
UD
3940 */
3941 from_len = to_len = 0;
d7e49b19 3942 for (size_t cnt = 0; cnt < number; ++cnt)
4b10dd6c
UD
3943 {
3944 struct translit_to_t *srunp;
3945 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3946 srunp = sorted[cnt]->to;
3947 while (srunp != NULL)
3948 {
3949 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3950 srunp = srunp->next;
3951 }
3952 /* Plus one for the extra NUL character marking the end of
3953 the list for the current entry. */
3954 ++to_len;
3955 }
3956
3957 /* We can allocate the arrays for the results. */
4a33c2f5
UD
3958 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3959 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3960 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3961 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
3962
3963 from_len = 0;
3964 to_len = 0;
d7e49b19 3965 for (size_t cnt = 0; cnt < number; ++cnt)
4b10dd6c
UD
3966 {
3967 size_t len;
3968 struct translit_to_t *srunp;
3969
4a33c2f5
UD
3970 ctype->translit_from_idx[cnt] = from_len;
3971 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3972
3973 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 3974 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
3975 (const wchar_t *) sorted[cnt]->from, len);
3976 from_len += len;
3977
4a33c2f5 3978 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3979 srunp = sorted[cnt]->to;
3980 while (srunp != NULL)
3981 {
3982 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 3983 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
3984 (const wchar_t *) srunp->str, len);
3985 to_len += len;
3986 srunp = srunp->next;
3987 }
4a33c2f5 3988 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 3989 }
4b10dd6c
UD
3990
3991 /* Store the information about the length. */
04fbc779 3992 ctype->translit_idx_size = number;
4b10dd6c
UD
3993 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3994 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3995 }
3996 else
3997 {
363a9899
OB
3998 ctype->translit_from_idx = no_str;
3999 ctype->translit_from_tbl = no_str;
4000 ctype->translit_to_tbl = no_str;
4b10dd6c
UD
4001 ctype->translit_idx_size = 0;
4002 ctype->translit_from_tbl_size = 0;
4003 ctype->translit_to_tbl_size = 0;
4004 }
19bc17a9 4005}