]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
01ff9d0b 1/* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
4b10dd6c 25#include <byteswap.h>
19bc17a9 26#include <endian.h>
4b10dd6c 27#include <errno.h>
19bc17a9 28#include <limits.h>
4b10dd6c
UD
29#include <obstack.h>
30#include <stdlib.h>
19bc17a9 31#include <string.h>
4b10dd6c
UD
32#include <wchar.h>
33#include <wctype.h>
34#include <sys/uio.h>
19bc17a9 35
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c
UD
41#include "locfile.h"
42#include "localedef.h"
19bc17a9 43
19bc17a9
RM
44#include <assert.h>
45
46
011ebfab 47#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
48/* These are the extra bits not in wctype.h since these are not preallocated
49 classes. */
011ebfab
UD
50# define _ISwspecial1 (1 << 29)
51# define _ISwspecial2 (1 << 30)
52# define _ISwspecial3 (1 << 31)
53#endif
19bc17a9
RM
54
55
56/* The bit used for representing a special class. */
57#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
58#define BIT(class) (_ISbit (BITPOS (class)))
59#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
60
61#define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
64
19bc17a9
RM
65
66/* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
4b10dd6c 69#define char_class_t uint16_t
4b10dd6c 70#define char_class32_t uint32_t
4b10dd6c
UD
71
72
73/* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77struct translit_to_t
78{
79 uint32_t *str;
80
81 struct translit_to_t *next;
82};
83
84struct translit_t
85{
86 uint32_t *from;
87
a673fbcb
UD
88 const char *fname;
89 size_t lineno;
90
4b10dd6c
UD
91 struct translit_to_t *to;
92
93 struct translit_t *next;
94};
19bc17a9 95
a673fbcb
UD
96struct translit_ignore_t
97{
98 uint32_t from;
99 uint32_t to;
a0dc5206 100 uint32_t step;
a673fbcb
UD
101
102 const char *fname;
103 size_t lineno;
104
105 struct translit_ignore_t *next;
106};
107
19bc17a9
RM
108
109/* The real definition of the struct for the LC_CTYPE locale. */
110struct locale_ctype_t
111{
4b10dd6c 112 uint32_t *charnames;
19bc17a9
RM
113 size_t charnames_max;
114 size_t charnames_act;
115
4b10dd6c
UD
116 struct repertoire_t *repertoire;
117
118 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
119#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 120 size_t nr_charclass;
19bc17a9 121 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
122 uint32_t last_class_char;
123 uint32_t class256_collection[256];
124 uint32_t *class_collection;
19bc17a9
RM
125 size_t class_collection_max;
126 size_t class_collection_act;
4b10dd6c
UD
127 uint32_t class_done;
128
129 struct charseq **mbdigits;
130 size_t mbdigits_act;
131 size_t mbdigits_max;
132 uint32_t *wcdigits;
133 size_t wcdigits_act;
134 size_t wcdigits_max;
135
136 struct charseq *mboutdigits[10];
137 uint32_t wcoutdigits[10];
138 size_t outdigits_act;
19bc17a9
RM
139
140 /* If the following number ever turns out to be too small simply
141 increase it. But I doubt it will. --drepper@gnu */
142#define MAX_NR_CHARMAP 16
143 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
144 uint32_t *map_collection[MAX_NR_CHARMAP];
145 uint32_t map256_collection[2][256];
9a0a462c
UD
146 size_t map_collection_max[MAX_NR_CHARMAP];
147 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
148 size_t map_collection_nr;
149 size_t last_map_idx;
4b10dd6c
UD
150 int tomap_done[MAX_NR_CHARMAP];
151
152 /* Transliteration information. */
153 const char *translit_copy_locale;
154 const char *translit_copy_repertoire;
155 struct translit_t *translit;
a673fbcb 156 struct translit_ignore_t *translit_ignore;
a8e4c924 157 uint32_t ntranslit_ignore;
a673fbcb
UD
158
159 uint32_t *default_missing;
160 const char *default_missing_file;
161 size_t default_missing_lineno;
19bc17a9
RM
162
163 /* The arrays for the binary representation. */
4b10dd6c
UD
164 uint32_t plane_size;
165 uint32_t plane_cnt;
19bc17a9
RM
166 char_class_t *ctype_b;
167 char_class32_t *ctype32_b;
4a33c2f5
UD
168 uint32_t *names;
169 uint32_t **map;
49f2be5b 170 uint32_t **map32;
4b10dd6c
UD
171 uint32_t *class_name_ptr;
172 uint32_t *map_name_ptr;
75cd5204 173 unsigned char *width;
4b10dd6c 174 uint32_t mb_cur_max;
6990326c 175 const char *codeset_name;
4a33c2f5
UD
176 uint32_t translit_hash_size;
177 uint32_t translit_hash_layers;
178 uint32_t *translit_from_idx;
179 uint32_t *translit_from_tbl;
180 uint32_t *translit_to_idx;
181 uint32_t *translit_to_tbl;
4b10dd6c
UD
182 size_t translit_idx_size;
183 size_t translit_from_tbl_size;
184 size_t translit_to_tbl_size;
185
a673fbcb 186 struct obstack mempool;
19bc17a9
RM
187};
188
189
4b10dd6c
UD
190#define obstack_chunk_alloc xmalloc
191#define obstack_chunk_free free
192
193
19bc17a9 194/* Prototypes for local functions. */
4b10dd6c
UD
195static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
196 struct charmap_t *charmap, int ignore_content);
197static void ctype_class_new (struct linereader *lr,
198 struct locale_ctype_t *ctype, const char *name);
199static void ctype_map_new (struct linereader *lr,
200 struct locale_ctype_t *ctype,
201 const char *name, struct charmap_t *charmap);
202static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
203 size_t *max, size_t *act, unsigned int idx);
19bc17a9 204static void set_class_defaults (struct locale_ctype_t *ctype,
4b10dd6c
UD
205 struct charmap_t *charmap,
206 struct repertoire_t *repertoire);
75cd5204 207static void allocate_arrays (struct locale_ctype_t *ctype,
4b10dd6c
UD
208 struct charmap_t *charmap,
209 struct repertoire_t *repertoire);
19bc17a9
RM
210
211
4b10dd6c
UD
212static const char *longnames[] =
213{
214 "zero", "one", "two", "three", "four",
215 "five", "six", "seven", "eight", "nine"
216};
1b97149d
UD
217static const char *uninames[] =
218{
219 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
220 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
221};
4b10dd6c
UD
222static const unsigned char digits[] = "0123456789";
223
224
225static void
19bc17a9 226ctype_startup (struct linereader *lr, struct localedef_t *locale,
4b10dd6c 227 struct charmap_t *charmap, int ignore_content)
19bc17a9
RM
228{
229 unsigned int cnt;
230 struct locale_ctype_t *ctype;
231
4b10dd6c 232 if (!ignore_content)
19bc17a9 233 {
4b10dd6c
UD
234 /* Allocate the needed room. */
235 locale->categories[LC_CTYPE].ctype = ctype =
236 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
237
238 /* We have seen no names yet. */
239 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
240 ctype->charnames =
241 (unsigned int *) xmalloc (ctype->charnames_max
242 * sizeof (unsigned int));
243 for (cnt = 0; cnt < 256; ++cnt)
244 ctype->charnames[cnt] = cnt;
245 ctype->charnames_act = 256;
246
247 /* Fill character class information. */
248 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
249 /* The order of the following instructions determines the bit
250 positions! */
251 ctype_class_new (lr, ctype, "upper");
252 ctype_class_new (lr, ctype, "lower");
253 ctype_class_new (lr, ctype, "alpha");
254 ctype_class_new (lr, ctype, "digit");
255 ctype_class_new (lr, ctype, "xdigit");
256 ctype_class_new (lr, ctype, "space");
257 ctype_class_new (lr, ctype, "print");
258 ctype_class_new (lr, ctype, "graph");
259 ctype_class_new (lr, ctype, "blank");
260 ctype_class_new (lr, ctype, "cntrl");
261 ctype_class_new (lr, ctype, "punct");
262 ctype_class_new (lr, ctype, "alnum");
011ebfab 263#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
264 /* The following are extensions from ISO 14652. */
265 ctype_class_new (lr, ctype, "left_to_right");
266 ctype_class_new (lr, ctype, "right_to_left");
267 ctype_class_new (lr, ctype, "num_terminator");
268 ctype_class_new (lr, ctype, "num_separator");
269 ctype_class_new (lr, ctype, "segment_separator");
270 ctype_class_new (lr, ctype, "block_separator");
271 ctype_class_new (lr, ctype, "direction_control");
272 ctype_class_new (lr, ctype, "sym_swap_layout");
273 ctype_class_new (lr, ctype, "char_shape_selector");
274 ctype_class_new (lr, ctype, "num_shape_selector");
275 ctype_class_new (lr, ctype, "non_spacing");
276 ctype_class_new (lr, ctype, "non_spacing_level3");
277 ctype_class_new (lr, ctype, "normal_connect");
278 ctype_class_new (lr, ctype, "r_connect");
279 ctype_class_new (lr, ctype, "no_connect");
280 ctype_class_new (lr, ctype, "no_connect-space");
281 ctype_class_new (lr, ctype, "vowel_connect");
011ebfab 282#endif
4b10dd6c
UD
283
284 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
285 ctype->class_collection
286 = (uint32_t *) xcalloc (sizeof (unsigned long int),
287 ctype->class_collection_max);
288 ctype->class_collection_act = 256;
289
290 /* Fill character map information. */
4b10dd6c
UD
291 ctype->last_map_idx = MAX_NR_CHARMAP;
292 ctype_map_new (lr, ctype, "toupper", charmap);
293 ctype_map_new (lr, ctype, "tolower", charmap);
011ebfab 294#ifdef PREDEFINED_CLASSES
4b10dd6c 295 ctype_map_new (lr, ctype, "tosymmetric", charmap);
011ebfab 296#endif
4b10dd6c
UD
297
298 /* Fill first 256 entries in `toXXX' arrays. */
299 for (cnt = 0; cnt < 256; ++cnt)
300 {
301 ctype->map_collection[0][cnt] = cnt;
302 ctype->map_collection[1][cnt] = cnt;
9e2b7438 303#ifdef PREDEFINED_CLASSES
4b10dd6c 304 ctype->map_collection[2][cnt] = cnt;
9e2b7438 305#endif
4b10dd6c
UD
306 ctype->map256_collection[0][cnt] = cnt;
307 ctype->map256_collection[1][cnt] = cnt;
308 }
309
a673fbcb 310 obstack_init (&ctype->mempool);
19bc17a9
RM
311 }
312}
313
314
315void
4b10dd6c 316ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
19bc17a9
RM
317{
318 /* See POSIX.2, table 2-6 for the meaning of the following table. */
319#define NCLASS 12
320 static const struct
321 {
322 const char *name;
323 const char allow[NCLASS];
324 }
325 valid_table[NCLASS] =
326 {
327 /* The order is important. See token.h for more information.
328 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
329 { "upper", "--MX-XDDXXX-" },
330 { "lower", "--MX-XDDXXX-" },
331 { "alpha", "---X-XDDXXX-" },
332 { "digit", "XXX--XDDXXX-" },
333 { "xdigit", "-----XDDXXX-" },
334 { "space", "XXXXX------X" },
335 { "print", "---------X--" },
336 { "graph", "---------X--" },
337 { "blank", "XXXXXM-----X" },
338 { "cntrl", "XXXXX-XX--XX" },
339 { "punct", "XXXXX-DD-X-X" },
340 { "alnum", "-----XDDXXX-" }
341 };
342 size_t cnt;
343 int cls1, cls2;
4b10dd6c
UD
344 uint32_t space_value;
345 struct charseq *space_seq;
19bc17a9 346 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 347 int warned;
0e16ecfa
UD
348 const void *key;
349 size_t len;
350 void *vdata;
351 void *curs;
19bc17a9 352
b9eb05d6
UD
353 /* Now resolve copying and also handle completely missing definitions. */
354 if (ctype == NULL)
355 {
70e51ab9
UD
356 const char *repertoire_name;
357
b9eb05d6
UD
358 /* First see whether we were supposed to copy. If yes, find the
359 actual definition. */
360 if (locale->copy_name[LC_CTYPE] != NULL)
361 {
362 /* Find the copying locale. This has to happen transitively since
363 the locale we are copying from might also copying another one. */
364 struct localedef_t *from = locale;
365
366 do
367 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
368 from->repertoire_name, charmap);
369 while (from->categories[LC_CTYPE].ctype == NULL
370 && from->copy_name[LC_CTYPE] != NULL);
371
372 ctype = locale->categories[LC_CTYPE].ctype
373 = from->categories[LC_CTYPE].ctype;
374 }
375
376 /* If there is still no definition issue an warning and create an
377 empty one. */
378 if (ctype == NULL)
379 {
f6ada7ad
UD
380 if (! be_quiet)
381 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
b9eb05d6
UD
382 ctype_startup (NULL, locale, charmap, 0);
383 ctype = locale->categories[LC_CTYPE].ctype;
384 }
70e51ab9
UD
385
386 /* Get the repertoire we have to use. */
387 repertoire_name = locale->repertoire_name ?: repertoire_global;
388 if (repertoire_name != NULL)
389 ctype->repertoire = repertoire_read (repertoire_name);
b9eb05d6
UD
390 }
391
db76d943
UD
392 /* We need the name of the currently used 8-bit character set to
393 make correct conversion between this 8-bit representation and the
394 ISO 10646 character set used internally for wide characters. */
395 ctype->codeset_name = charmap->code_set_name;
396 if (ctype->codeset_name == NULL)
397 {
398 if (! be_quiet)
399 error (0, 0, "no character set name specified in charmap");
400 ctype->codeset_name = "//UNKNOWN//";
401 }
402
19bc17a9 403 /* Set default value for classes not specified. */
4b10dd6c 404 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
405
406 /* Check according to table. */
42d7c593 407 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
19bc17a9 408 {
4b10dd6c 409 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 410
4b10dd6c
UD
411 if (tmp != 0)
412 {
413 for (cls1 = 0; cls1 < NCLASS; ++cls1)
414 if ((tmp & _ISwbit (cls1)) != 0)
415 for (cls2 = 0; cls2 < NCLASS; ++cls2)
416 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 417 {
4b10dd6c
UD
418 int eq = (tmp & _ISwbit (cls2)) != 0;
419 switch (valid_table[cls1].allow[cls2])
19bc17a9 420 {
4b10dd6c
UD
421 case 'M':
422 if (!eq)
423 {
424 uint32_t value = ctype->charnames[cnt];
425
426 if (!be_quiet)
427 error (0, 0, _("\
428character L'\\u%0*x' in class `%s' must be in class `%s'"),
429 value > 0xffff ? 8 : 4, value,
430 valid_table[cls1].name,
431 valid_table[cls2].name);
432 }
433 break;
434
435 case 'X':
436 if (eq)
437 {
438 uint32_t value = ctype->charnames[cnt];
439
440 if (!be_quiet)
441 error (0, 0, _("\
442character L'\\u%0*x' in class `%s' must not be in class `%s'"),
443 value > 0xffff ? 8 : 4, value,
444 valid_table[cls1].name,
445 valid_table[cls2].name);
446 }
447 break;
448
449 case 'D':
450 ctype->class_collection[cnt] |= _ISwbit (cls2);
451 break;
452
453 default:
454 error (5, 0, _("internal error in %s, line %u"),
455 __FUNCTION__, __LINE__);
19bc17a9 456 }
4b10dd6c
UD
457 }
458 }
459 }
460
461 for (cnt = 0; cnt < 256; ++cnt)
462 {
463 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 464
4b10dd6c
UD
465 if (tmp != 0)
466 {
467 for (cls1 = 0; cls1 < NCLASS; ++cls1)
468 if ((tmp & _ISbit (cls1)) != 0)
469 for (cls2 = 0; cls2 < NCLASS; ++cls2)
470 if (valid_table[cls1].allow[cls2] != '-')
471 {
472 int eq = (tmp & _ISbit (cls2)) != 0;
473 switch (valid_table[cls1].allow[cls2])
19bc17a9 474 {
4b10dd6c
UD
475 case 'M':
476 if (!eq)
477 {
478 char buf[17];
479
5d431a3e 480 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
481
482 if (!be_quiet)
483 error (0, 0, _("\
484character '%s' in class `%s' must be in class `%s'"),
485 buf, valid_table[cls1].name,
486 valid_table[cls2].name);
487 }
488 break;
489
490 case 'X':
491 if (eq)
492 {
493 char buf[17];
494
5d431a3e 495 snprintf (buf, sizeof buf, "\\%Zo", cnt);
4b10dd6c
UD
496
497 if (!be_quiet)
498 error (0, 0, _("\
499character '%s' in class `%s' must not be in class `%s'"),
500 buf, valid_table[cls1].name,
501 valid_table[cls2].name);
502 }
503 break;
504
505 case 'D':
506 ctype->class256_collection[cnt] |= _ISbit (cls2);
507 break;
508
509 default:
510 error (5, 0, _("internal error in %s, line %u"),
511 __FUNCTION__, __LINE__);
19bc17a9 512 }
4b10dd6c
UD
513 }
514 }
19bc17a9
RM
515 }
516
517 /* ... and now test <SP> as a special case. */
a0dc5206
UD
518 space_value = 32;
519 if (((cnt = BITPOS (tok_space),
520 (ELEM (ctype, class_collection, , space_value)
521 & BITw (tok_space)) == 0)
522 || (cnt = BITPOS (tok_blank),
523 (ELEM (ctype, class_collection, , space_value)
524 & BITw (tok_blank)) == 0)))
880f421f
UD
525 {
526 if (!be_quiet)
527 error (0, 0, _("<SP> character not in class `%s'"),
528 valid_table[cnt].name);
529 }
c84142e8
UD
530 else if (((cnt = BITPOS (tok_punct),
531 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 532 & BITw (tok_punct)) != 0)
c84142e8
UD
533 || (cnt = BITPOS (tok_graph),
534 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 535 & BITw (tok_graph))
880f421f
UD
536 != 0)))
537 {
538 if (!be_quiet)
539 error (0, 0, _("<SP> character must not be in class `%s'"),
540 valid_table[cnt].name);
541 }
19bc17a9 542 else
4b10dd6c
UD
543 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
544
545 space_seq = charmap_find_value (charmap, "SP", 2);
ce177a84 546 if (space_seq == NULL)
45c95239
UD
547 space_seq = charmap_find_value (charmap, "space", 5);
548 if (space_seq == NULL)
1b97149d 549 space_seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
550 if (space_seq == NULL || space_seq->nbytes != 1)
551 {
552 if (!be_quiet)
553 error (0, 0, _("character <SP> not defined in character map"));
554 }
555 else if (((cnt = BITPOS (tok_space),
556 (ctype->class256_collection[space_seq->bytes[0]]
557 & BIT (tok_space)) == 0)
558 || (cnt = BITPOS (tok_blank),
559 (ctype->class256_collection[space_seq->bytes[0]]
560 & BIT (tok_blank)) == 0)))
561 {
562 if (!be_quiet)
563 error (0, 0, _("<SP> character not in class `%s'"),
564 valid_table[cnt].name);
565 }
566 else if (((cnt = BITPOS (tok_punct),
567 (ctype->class256_collection[space_seq->bytes[0]]
568 & BIT (tok_punct)) != 0)
569 || (cnt = BITPOS (tok_graph),
570 (ctype->class256_collection[space_seq->bytes[0]]
571 & BIT (tok_graph)) != 0)))
572 {
573 if (!be_quiet)
574 error (0, 0, _("<SP> character must not be in class `%s'"),
575 valid_table[cnt].name);
576 }
577 else
578 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204
RM
579
580 /* Now that the tests are done make sure the name array contains all
581 characters which are handled in the WIDTH section of the
582 character set definition file. */
4b10dd6c
UD
583 if (charmap->width_rules != NULL)
584 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 585 {
827ff758
UD
586 unsigned char bytes[charmap->mb_cur_max];
587 int nbytes = charmap->width_rules[cnt].from->nbytes;
588
589 /* We have the range of character for which the width is
590 specified described using byte sequences of the multibyte
591 charset. We have to convert this to UCS4 now. And we
592 cannot simply convert the beginning and the end of the
593 sequence, we have to iterate over the byte sequence and
594 convert it for every single character. */
595 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
596
597 while (nbytes < charmap->width_rules[cnt].to->nbytes
598 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
599 nbytes) <= 0)
600 {
601 /* Find the UCS value for `bytes'. */
827ff758 602 int inner;
76e680a8
UD
603 uint32_t wch;
604 struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
605
606 if (seq == NULL)
607 wch = ILLEGAL_CHAR_VALUE;
608 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
609 wch = seq->ucs4;
610 else
611 wch = repertoire_find_value (ctype->repertoire, seq->name,
612 strlen (seq->name));
827ff758
UD
613
614 if (wch != ILLEGAL_CHAR_VALUE)
615 /* We are only interested in the side-effects of the
616 `find_idx' call. It will add appropriate entries in
617 the name array if this is necessary. */
618 (void) find_idx (ctype, NULL, NULL, NULL, wch);
619
620 /* "Increment" the bytes sequence. */
621 inner = nbytes - 1;
622 while (inner >= 0 && bytes[inner] == 0xff)
623 --inner;
624
625 if (inner < 0)
626 {
627 /* We have to extend the byte sequence. */
628 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
629 break;
630
631 bytes[0] = 1;
632 memset (&bytes[1], 0, nbytes);
633 ++nbytes;
634 }
635 else
636 {
637 ++bytes[inner];
638 while (++inner < nbytes)
639 bytes[inner] = 0;
640 }
641 }
4b10dd6c
UD
642 }
643
0e16ecfa
UD
644 /* Now set all the other characters of the character set to the
645 default width. */
646 curs = NULL;
647 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
648 {
649 struct charseq *data = (struct charseq *) vdata;
650
651 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
652 data->ucs4 = repertoire_find_value (ctype->repertoire,
653 data->name, len);
654
655 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
656 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
657 }
658
4b10dd6c
UD
659 /* There must be a multiple of 10 digits. */
660 if (ctype->mbdigits_act % 10 != 0)
661 {
662 assert (ctype->mbdigits_act == ctype->wcdigits_act);
663 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
664 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
665 error (0, 0, _("`digit' category has not entries in groups of ten"));
666 }
667
668 /* Check the input digits. There must be a multiple of ten available.
42d7c593 669 In each group it could be that one or the other character is missing.
4b10dd6c
UD
670 In this case the whole group must be removed. */
671 cnt = 0;
672 while (cnt < ctype->mbdigits_act)
673 {
674 size_t inner;
675 for (inner = 0; inner < 10; ++inner)
676 if (ctype->mbdigits[cnt + inner] == NULL)
677 break;
678
679 if (inner == 10)
680 cnt += 10;
681 else
682 {
683 /* Remove the group. */
684 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
685 ((ctype->wcdigits_act - cnt - 10)
686 * sizeof (ctype->mbdigits[0])));
687 ctype->mbdigits_act -= 10;
688 }
689 }
690
691 /* If no input digits are given use the default. */
692 if (ctype->mbdigits_act == 0)
693 {
694 if (ctype->mbdigits_max == 0)
695 {
696 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
697 10 * sizeof (struct charseq *));
698 ctype->mbdigits_max = 10;
699 }
700
701 for (cnt = 0; cnt < 10; ++cnt)
702 {
703 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
704 digits + cnt, 1);
705 if (ctype->mbdigits[cnt] == NULL)
706 {
707 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
708 longnames[cnt],
709 strlen (longnames[cnt]));
710 if (ctype->mbdigits[cnt] == NULL)
711 {
712 /* Hum, this ain't good. */
713 error (0, 0, _("\
714no input digits defined and none of the standard names in the charmap"));
715
716 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
717 sizeof (struct charseq) + 1);
718
719 /* This is better than nothing. */
720 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
721 ctype->mbdigits[cnt]->nbytes = 1;
722 }
723 }
724 }
725
726 ctype->mbdigits_act = 10;
727 }
728
729 /* Check the wide character input digits. There must be a multiple
42d7c593 730 of ten available. In each group it could be that one or the other
4b10dd6c
UD
731 character is missing. In this case the whole group must be
732 removed. */
733 cnt = 0;
734 while (cnt < ctype->wcdigits_act)
735 {
736 size_t inner;
737 for (inner = 0; inner < 10; ++inner)
738 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
739 break;
740
741 if (inner == 10)
742 cnt += 10;
743 else
744 {
745 /* Remove the group. */
746 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
747 ((ctype->wcdigits_act - cnt - 10)
748 * sizeof (ctype->wcdigits[0])));
749 ctype->wcdigits_act -= 10;
750 }
751 }
752
753 /* If no input digits are given use the default. */
754 if (ctype->wcdigits_act == 0)
755 {
756 if (ctype->wcdigits_max == 0)
757 {
758 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
759 10 * sizeof (uint32_t));
760 ctype->wcdigits_max = 10;
761 }
762
763 for (cnt = 0; cnt < 10; ++cnt)
764 ctype->wcdigits[cnt] = L'0' + cnt;
765
766 ctype->mbdigits_act = 10;
767 }
768
769 /* Check the outdigits. */
770 warned = 0;
771 for (cnt = 0; cnt < 10; ++cnt)
772 if (ctype->mboutdigits[cnt] == NULL)
773 {
774 static struct charseq replace[2];
775
776 if (!warned)
777 {
778 error (0, 0, _("\
779not all characters used in `outdigit' are available in the charmap"));
780 warned = 1;
781 }
782
783 replace[0].nbytes = 1;
784 replace[0].bytes[0] = '?';
785 replace[0].bytes[1] = '\0';
786 ctype->mboutdigits[cnt] = &replace[0];
787 }
788
789 warned = 0;
790 for (cnt = 0; cnt < 10; ++cnt)
791 if (ctype->wcoutdigits[cnt] == 0)
792 {
793 if (!warned)
794 {
795 error (0, 0, _("\
796not all characters used in `outdigit' are available in the repertoire"));
797 warned = 1;
798 }
799
800 ctype->wcoutdigits[cnt] = L'?';
75cd5204 801 }
a8e4c924
UD
802
803 /* Sort the entries in the translit_ignore list. */
804 if (ctype->translit_ignore != NULL)
805 {
806 struct translit_ignore_t *firstp = ctype->translit_ignore;
807 struct translit_ignore_t *runp;
808
809 ctype->ntranslit_ignore = 1;
810
811 for (runp = firstp->next; runp != NULL; runp = runp->next)
812 {
813 struct translit_ignore_t *lastp = NULL;
814 struct translit_ignore_t *cmpp;
815
816 ++ctype->ntranslit_ignore;
817
818 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
819 if (runp->from < cmpp->from)
820 break;
821
822 runp->next = lastp;
823 if (lastp == NULL)
824 firstp = runp;
825 }
826
827 ctype->translit_ignore = firstp;
828 }
19bc17a9
RM
829}
830
831
832void
4b10dd6c 833ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
75cd5204 834 const char *output_path)
19bc17a9
RM
835{
836 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
837 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
5491da0d 838 + (ctype->map_collection_nr - 2));
75cd5204
RM
839 struct iovec iov[2 + nelems + ctype->nr_charclass
840 + ctype->map_collection_nr];
19bc17a9 841 struct locale_file data;
4b10dd6c 842 uint32_t idx[nelems + 1];
1d96d74d 843 uint32_t default_missing_len;
75cd5204 844 size_t elem, cnt, offset, total;
4b10dd6c 845 char *cp;
19bc17a9
RM
846
847 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 848 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9
RM
849
850 data.magic = LIMAGIC (LC_CTYPE);
851 data.n = nelems;
852 iov[0].iov_base = (void *) &data;
853 iov[0].iov_len = sizeof (data);
854
855 iov[1].iov_base = (void *) idx;
a0edd63e 856 iov[1].iov_len = nelems * sizeof (uint32_t);
19bc17a9
RM
857
858 idx[0] = iov[0].iov_len + iov[1].iov_len;
859 offset = 0;
860
861 for (elem = 0; elem < nelems; ++elem)
862 {
863 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
864 switch (elem)
865 {
866#define CTYPE_DATA(name, base, len) \
867 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
868 iov[2 + elem + offset].iov_base = (base); \
869 iov[2 + elem + offset].iov_len = (len); \
1d96d74d 870 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
871 break
872
873 CTYPE_DATA (_NL_CTYPE_CLASS,
874 ctype->ctype_b,
875 (256 + 128) * sizeof (char_class_t));
876
4a33c2f5
UD
877 CTYPE_DATA (_NL_CTYPE_TOUPPER,
878 ctype->map[0],
f1d8b804 879 (256 + 128) * sizeof (uint32_t));
4a33c2f5
UD
880 CTYPE_DATA (_NL_CTYPE_TOLOWER,
881 ctype->map[1],
f1d8b804 882 (256 + 128) * sizeof (uint32_t));
19bc17a9 883
49f2be5b
UD
884 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
885 ctype->map32[0],
f1d8b804 886 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
887 * sizeof (uint32_t));
888 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
889 ctype->map32[1],
f1d8b804 890 (ctype->plane_size * ctype->plane_cnt)
49f2be5b
UD
891 * sizeof (uint32_t));
892
19bc17a9
RM
893 CTYPE_DATA (_NL_CTYPE_CLASS32,
894 ctype->ctype32_b,
895 (ctype->plane_size * ctype->plane_cnt
896 * sizeof (char_class32_t)));
897
4a33c2f5
UD
898 CTYPE_DATA (_NL_CTYPE_NAMES,
899 ctype->names, (ctype->plane_size * ctype->plane_cnt
900 * sizeof (uint32_t)));
901
902 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
903 &ctype->translit_hash_size, sizeof (uint32_t));
904 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
905 &ctype->translit_hash_layers, sizeof (uint32_t));
906
907 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
908 ctype->translit_from_idx,
4b10dd6c
UD
909 ctype->translit_idx_size);
910
4a33c2f5
UD
911 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
912 ctype->translit_from_tbl,
4b10dd6c
UD
913 ctype->translit_from_tbl_size);
914
4a33c2f5
UD
915 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
916 ctype->translit_to_idx,
4b10dd6c
UD
917 ctype->translit_idx_size);
918
4a33c2f5
UD
919 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
920 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
4b10dd6c 921
4a33c2f5 922 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
4b10dd6c 923 &ctype->plane_size, sizeof (uint32_t));
4a33c2f5 924 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
4b10dd6c 925 &ctype->plane_cnt, sizeof (uint32_t));
19bc17a9 926
75cd5204
RM
927 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
928 /* The class name array. */
929 total = 0;
930 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
931 {
932 iov[2 + elem + offset].iov_base
933 = (void *) ctype->classnames[cnt];
934 iov[2 + elem + offset].iov_len
935 = strlen (ctype->classnames[cnt]) + 1;
936 total += iov[2 + elem + offset].iov_len;
937 }
ce7a5ef4
RM
938 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
939 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
940 total += 1 + (4 - ((total + 1) % 4));
75cd5204 941
4b10dd6c 942 idx[elem + 1] = idx[elem] + total;
75cd5204
RM
943 break;
944
945 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
946 /* The class name array. */
947 total = 0;
948 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
949 {
950 iov[2 + elem + offset].iov_base
951 = (void *) ctype->mapnames[cnt];
952 iov[2 + elem + offset].iov_len
953 = strlen (ctype->mapnames[cnt]) + 1;
954 total += iov[2 + elem + offset].iov_len;
955 }
ce7a5ef4
RM
956 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
957 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
958 total += 1 + (4 - ((total + 1) % 4));
75cd5204 959
4b10dd6c 960 idx[elem + 1] = idx[elem] + total;
75cd5204 961 break;
19bc17a9
RM
962
963 CTYPE_DATA (_NL_CTYPE_WIDTH,
5866b131
UD
964 ctype->width,
965 (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul);
19bc17a9 966
0200214b 967 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
4b10dd6c 968 &ctype->mb_cur_max, sizeof (uint32_t));
0200214b 969
ce7a5ef4
RM
970 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
971 total = strlen (ctype->codeset_name) + 1;
972 if (total % 4 == 0)
973 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
974 else
975 {
976 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
977 memset (mempcpy (iov[2 + elem + offset].iov_base,
978 ctype->codeset_name, total),
979 '\0', 4 - (total & 3));
ce7a5ef4
RM
980 total = (total + 3) & ~3;
981 }
982 iov[2 + elem + offset].iov_len = total;
4b10dd6c
UD
983 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
984 break;
985
4a33c2f5 986 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
4b10dd6c
UD
987 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
988 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
989 *(uint32_t *) iov[2 + elem + offset].iov_base =
990 ctype->mbdigits_act / 10;
a9c27b3e 991 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
992 break;
993
4a33c2f5 994 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
4b10dd6c
UD
995 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
996 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
4a33c2f5
UD
997 *(uint32_t *) iov[2 + elem + offset].iov_base =
998 ctype->wcdigits_act / 10;
a9c27b3e 999 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
4b10dd6c
UD
1000 break;
1001
1002 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1003 /* Compute the length of all possible characters. For INDIGITS
1004 there might be more than one. We simply concatenate all of
1005 them with a NUL byte following. The NUL byte wouldn't be
1006 necessary but it makes it easier for the user. */
1007 total = 0;
498b733e 1008 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1009 cnt < ctype->mbdigits_act; cnt += 10)
1010 total += ctype->mbdigits[cnt]->nbytes + 1;
1011 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1012 iov[2 + elem + offset].iov_len = total;
1013
1014 cp = iov[2 + elem + offset].iov_base;
498b733e 1015 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
4b10dd6c
UD
1016 cnt < ctype->mbdigits_act; cnt += 10)
1017 {
1018 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1019 ctype->mbdigits[cnt]->nbytes);
1020 *cp++ = '\0';
1021 }
a9c27b3e 1022 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1023 break;
1024
1025 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1026 /* Compute the length of all possible characters. For INDIGITS
1027 there might be more than one. We simply concatenate all of
1028 them with a NUL byte following. The NUL byte wouldn't be
1029 necessary but it makes it easier for the user. */
498b733e 1030 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
4b10dd6c
UD
1031 total = ctype->mboutdigits[cnt]->nbytes + 1;
1032 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1033 iov[2 + elem + offset].iov_len = total;
1034
1035 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
498b733e
UD
1036 ctype->mboutdigits[cnt]->bytes,
1037 ctype->mboutdigits[cnt]->nbytes) = '\0';
a9c27b3e 1038 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1039 break;
1040
4a33c2f5 1041 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
4b10dd6c
UD
1042 total = ctype->wcdigits_act / 10;
1043
1044 iov[2 + elem + offset].iov_base =
1045 (uint32_t *) alloca (total * sizeof (uint32_t));
1046 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1047
498b733e 1048 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
4b10dd6c
UD
1049 cnt < ctype->wcdigits_act; cnt += 10)
1050 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
4a33c2f5 1051 = ctype->wcdigits[cnt];
a9c27b3e 1052 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1053 break;
1054
4a33c2f5 1055 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
498b733e 1056 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
4b10dd6c
UD
1057 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1058 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a9c27b3e 1059 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
4b10dd6c
UD
1060 break;
1061
a8e4c924
UD
1062 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1063 default_missing_len = (ctype->default_missing
1064 ? wcslen ((wchar_t *)ctype->default_missing)
1065 : 1);
1066 iov[2 + elem + offset].iov_base = &default_missing_len;
1067 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1068 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1069 break;
1070
1d96d74d
UD
1071 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1072 iov[2 + elem + offset].iov_base =
1073 ctype->default_missing ?: (uint32_t *) L"";
1074 iov[2 + elem + offset].iov_len =
1075 wcslen (iov[2 + elem + offset].iov_base);
1076 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1077 break;
1078
a8e4c924
UD
1079 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1080 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1d96d74d 1081 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
a8e4c924
UD
1082 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1083 break;
1084
1085 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1086 {
1087 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1088 * 3 * sizeof (uint32_t));
1089 struct translit_ignore_t *runp;
1090
1091 iov[2 + elem + offset].iov_base = ranges;
1092 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1093 * 3 * sizeof (uint32_t));
1094
1095 for (runp = ctype->translit_ignore; runp != NULL;
1096 runp = runp->next)
1097 {
1098 *ranges++ = runp->from;
1099 *ranges++ = runp->to;
1100 *ranges++ = runp->step;
1101 }
1102 }
1d96d74d
UD
1103 /* Remove the following line in case a new entry is added
1104 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1105 if (elem < nelems)
1106 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1107 break;
1108
19bc17a9
RM
1109 default:
1110 assert (! "unknown CTYPE element");
1111 }
1112 else
1113 {
1114 /* Handle extra maps. */
5491da0d 1115 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
19bc17a9 1116
49f2be5b 1117 iov[2 + elem + offset].iov_base = ctype->map32[nr];
75cd5204 1118 iov[2 + elem + offset].iov_len = ((ctype->plane_size
f1d8b804 1119 * ctype->plane_cnt)
4b10dd6c 1120 * sizeof (uint32_t));
19bc17a9 1121
4b10dd6c 1122 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
75cd5204 1123 }
19bc17a9 1124 }
19bc17a9 1125
75cd5204
RM
1126 assert (2 + elem + offset == (nelems + ctype->nr_charclass
1127 + ctype->map_collection_nr + 2));
19bc17a9 1128
75cd5204 1129 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
1130}
1131
1132
4b10dd6c
UD
1133/* Local functions. */
1134static void
1135ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1136 const char *name)
19bc17a9 1137{
4b10dd6c 1138 size_t cnt;
19bc17a9 1139
4b10dd6c
UD
1140 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1141 if (strcmp (ctype->classnames[cnt], name) == 0)
1142 break;
19bc17a9 1143
4b10dd6c
UD
1144 if (cnt < ctype->nr_charclass)
1145 {
1146 lr_error (lr, _("character class `%s' already defined"), name);
1147 return;
1148 }
19bc17a9 1149
4b10dd6c
UD
1150 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1151 /* Exit code 2 is prescribed in P1003.2b. */
1152 error (2, 0, _("\
5d431a3e 1153implementation limit: no more than %Zd character classes allowed"),
4b10dd6c 1154 MAX_NR_CHARCLASS);
19bc17a9 1155
4b10dd6c 1156 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1157}
1158
1159
4b10dd6c
UD
1160static void
1161ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1162 const char *name, struct charmap_t *charmap)
19bc17a9 1163{
4b10dd6c 1164 size_t max_chars = 0;
ba1ffaa1 1165 size_t cnt;
19bc17a9 1166
4b10dd6c 1167 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1168 {
4b10dd6c
UD
1169 if (strcmp (ctype->mapnames[cnt], name) == 0)
1170 break;
1171
1172 if (max_chars < ctype->map_collection_max[cnt])
1173 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1174 }
1175
4b10dd6c
UD
1176 if (cnt < ctype->map_collection_nr)
1177 {
1178 lr_error (lr, _("character map `%s' already defined"), name);
1179 return;
1180 }
19bc17a9 1181
4b10dd6c
UD
1182 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1183 /* Exit code 2 is prescribed in P1003.2b. */
1184 error (2, 0, _("\
1185implementation limit: no more than %d character maps allowed"),
1186 MAX_NR_CHARMAP);
19bc17a9 1187
4b10dd6c
UD
1188 ctype->mapnames[cnt] = name;
1189
1190 if (max_chars == 0)
1191 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1192 else
1193 ctype->map_collection_max[cnt] = max_chars;
1194
1195 ctype->map_collection[cnt] = (uint32_t *)
5866b131 1196 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
4b10dd6c 1197 ctype->map_collection_act[cnt] = 256;
19bc17a9 1198
4b10dd6c 1199 ++ctype->map_collection_nr;
19bc17a9
RM
1200}
1201
1202
4b10dd6c 1203/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
42d7c593 1204 is possible if we only want to extend the name array. */
4b10dd6c
UD
1205static uint32_t *
1206find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1207 size_t *act, uint32_t idx)
19bc17a9 1208{
4b10dd6c 1209 size_t cnt;
19bc17a9 1210
4b10dd6c
UD
1211 if (idx < 256)
1212 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1213
4b10dd6c
UD
1214 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1215 if (ctype->charnames[cnt] == idx)
1216 break;
19bc17a9 1217
4b10dd6c
UD
1218 /* We have to distinguish two cases: the name is found or not. */
1219 if (cnt == ctype->charnames_act)
1220 {
1221 /* Extend the name array. */
1222 if (ctype->charnames_act == ctype->charnames_max)
1223 {
1224 ctype->charnames_max *= 2;
5866b131 1225 ctype->charnames = (uint32_t *)
4b10dd6c 1226 xrealloc (ctype->charnames,
5866b131 1227 sizeof (uint32_t) * ctype->charnames_max);
4b10dd6c
UD
1228 }
1229 ctype->charnames[ctype->charnames_act++] = idx;
1230 }
19bc17a9 1231
4b10dd6c
UD
1232 if (table == NULL)
1233 /* We have done everything we are asked to do. */
1234 return NULL;
19bc17a9 1235
4b10dd6c
UD
1236 if (cnt >= *act)
1237 {
1238 if (cnt >= *max)
1239 {
1240 size_t old_max = *max;
1241 do
1242 *max *= 2;
1243 while (*max <= cnt);
19bc17a9 1244
4b10dd6c 1245 *table =
5866b131 1246 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
4b10dd6c
UD
1247 memset (&(*table)[old_max], '\0',
1248 (*max - old_max) * sizeof (uint32_t));
1249 }
19bc17a9 1250
76e680a8 1251 *act = cnt + 1;
4b10dd6c 1252 }
19bc17a9 1253
4b10dd6c 1254 return &(*table)[cnt];
19bc17a9
RM
1255}
1256
1257
4b10dd6c
UD
1258static int
1259get_character (struct token *now, struct charmap_t *charmap,
1260 struct repertoire_t *repertoire,
1261 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1262{
4b10dd6c
UD
1263 if (now->tok == tok_bsymbol)
1264 {
1265 /* This will hopefully be the normal case. */
1266 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1267 now->val.str.lenmb);
1268 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1269 now->val.str.lenmb);
1270 }
1271 else if (now->tok == tok_ucs4)
1272 {
f0a4b6b1
UD
1273 char utmp[10];
1274
1275 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1276 *seqp = charmap_find_value (charmap, utmp, 9);
1277
1278 if (*seqp == NULL)
1279 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1280
4b10dd6c
UD
1281 if (*seqp == NULL)
1282 {
1283 /* Compute the value in the charmap from the UCS value. */
1284 const char *symbol = repertoire_find_symbol (repertoire,
1285 now->val.ucs4);
19bc17a9 1286
4b10dd6c
UD
1287 if (symbol == NULL)
1288 *seqp = NULL;
1289 else
1290 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1291
4b10dd6c
UD
1292 if (*seqp == NULL)
1293 {
723faa38
UD
1294 if (repertoire != NULL)
1295 {
1296 /* Insert a negative entry. */
1297 static const struct charseq negative
1298 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1299 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1300 sizeof (uint32_t));
1301 *newp = now->val.ucs4;
1302
1303 insert_entry (&repertoire->seq_table, newp,
1304 sizeof (uint32_t), (void *) &negative);
1305 }
4b10dd6c
UD
1306 }
1307 else
1308 (*seqp)->ucs4 = now->val.ucs4;
1309 }
1310 else if ((*seqp)->ucs4 != now->val.ucs4)
1311 *seqp = NULL;
19bc17a9 1312
4b10dd6c
UD
1313 *wchp = now->val.ucs4;
1314 }
1315 else if (now->tok == tok_charcode)
1316 {
1317 /* We must map from the byte code to UCS4. */
1318 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1319 now->val.str.lenmb);
19bc17a9 1320
4b10dd6c
UD
1321 if (*seqp == NULL)
1322 *wchp = ILLEGAL_CHAR_VALUE;
1323 else
1324 {
1325 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1326 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1327 strlen ((*seqp)->name));
1328 *wchp = (*seqp)->ucs4;
1329 }
1330 }
1331 else
1332 return 1;
19bc17a9
RM
1333
1334 return 0;
1335}
1336
1337
a0dc5206
UD
1338/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1339 the .(2). counterparts. */
4b10dd6c
UD
1340static void
1341charclass_symbolic_ellipsis (struct linereader *ldfile,
1342 struct locale_ctype_t *ctype,
1343 struct charmap_t *charmap,
1344 struct repertoire_t *repertoire,
1345 struct token *now,
1346 const char *last_str,
1347 unsigned long int class256_bit,
1348 unsigned long int class_bit, int base,
a0dc5206 1349 int ignore_content, int handle_digits, int step)
19bc17a9 1350{
4b10dd6c
UD
1351 const char *nowstr = now->val.str.startmb;
1352 char tmp[now->val.str.lenmb + 1];
1353 const char *cp;
1354 char *endp;
1355 unsigned long int from;
1356 unsigned long int to;
19bc17a9 1357
4b10dd6c
UD
1358 /* We have to compute the ellipsis values using the symbolic names. */
1359 assert (last_str != NULL);
1360
1361 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1362 {
4b10dd6c
UD
1363 invalid_range:
1364 lr_error (ldfile,
549b3c3a 1365 _("`%s' and `%.*s' are no valid names for symbolic range"),
f6ada7ad 1366 last_str, (int) now->val.str.lenmb, nowstr);
4b10dd6c 1367 return;
19bc17a9
RM
1368 }
1369
4b10dd6c
UD
1370 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1371 /* Nothing to do, the names are the same. */
1372 return;
19bc17a9 1373
4b10dd6c
UD
1374 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1375 ;
19bc17a9 1376
4b10dd6c
UD
1377 errno = 0;
1378 from = strtoul (cp, &endp, base);
1379 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1380 goto invalid_range;
19bc17a9 1381
4b10dd6c 1382 to = strtoul (nowstr + (cp - last_str), &endp, base);
549b3c3a
UD
1383 if ((to == UINT_MAX && errno == ERANGE)
1384 || (endp - nowstr) != now->val.str.lenmb || from >= to)
4b10dd6c 1385 goto invalid_range;
19bc17a9 1386
4b10dd6c
UD
1387 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1388 if (!ignore_content)
1389 {
1390 now->val.str.startmb = tmp;
a0dc5206 1391 while ((from += step) <= to)
4b10dd6c
UD
1392 {
1393 struct charseq *seq;
1394 uint32_t wch;
19bc17a9 1395
4b10dd6c
UD
1396 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1397 last_str, now->val.str.lenmb - (cp - last_str), from);
19bc17a9 1398
4b10dd6c
UD
1399 get_character (now, charmap, repertoire, &seq, &wch);
1400
1401 if (seq != NULL && seq->nbytes == 1)
1402 /* Yep, we can store information about this byte sequence. */
1403 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1404
4b10dd6c
UD
1405 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1406 /* We have the UCS4 position. */
1407 *find_idx (ctype, &ctype->class_collection,
1408 &ctype->class_collection_max,
1409 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1410
4b10dd6c
UD
1411 if (handle_digits == 1)
1412 {
1413 /* We must store the digit values. */
1414 if (ctype->mbdigits_act == ctype->mbdigits_max)
1415 {
1416 ctype->mbdigits_max *= 2;
1417 ctype->mbdigits = xrealloc (ctype->mbdigits,
1418 (ctype->mbdigits_max
1419 * sizeof (char *)));
1420 ctype->wcdigits_max *= 2;
1421 ctype->wcdigits = xrealloc (ctype->wcdigits,
1422 (ctype->wcdigits_max
1423 * sizeof (uint32_t)));
1424 }
1425
1426 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428 }
1429 else if (handle_digits == 2)
1430 {
1431 /* We must store the digit values. */
1432 if (ctype->outdigits_act >= 10)
1433 {
1434 lr_error (ldfile, _("\
1435%s: field `%s' does not contain exactly ten entries"),
1436 "LC_CTYPE", "outdigit");
1437 return;
1438 }
1439
1440 ctype->mboutdigits[ctype->outdigits_act] = seq;
1441 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442 ++ctype->outdigits_act;
1443 }
1444 }
1445 }
19bc17a9
RM
1446}
1447
1448
a0dc5206 1449/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
4b10dd6c
UD
1450static void
1451charclass_ucs4_ellipsis (struct linereader *ldfile,
1452 struct locale_ctype_t *ctype,
1453 struct charmap_t *charmap,
1454 struct repertoire_t *repertoire,
1455 struct token *now, uint32_t last_wch,
1456 unsigned long int class256_bit,
1457 unsigned long int class_bit, int ignore_content,
a0dc5206 1458 int handle_digits, int step)
19bc17a9 1459{
4b10dd6c 1460 if (last_wch > now->val.ucs4)
19bc17a9 1461 {
4b10dd6c
UD
1462 lr_error (ldfile, _("\
1463to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1464 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1465 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1466 return;
1467 }
1468
4b10dd6c 1469 if (!ignore_content)
a0dc5206 1470 while ((last_wch += step) <= now->val.ucs4)
4b10dd6c
UD
1471 {
1472 /* We have to find out whether there is a byte sequence corresponding
1473 to this UCS4 value. */
f0a4b6b1
UD
1474 struct charseq *seq;
1475 char utmp[10];
1476
1477 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478 seq = charmap_find_value (charmap, utmp, 9);
a0dc5206
UD
1479 if (seq == NULL)
1480 {
1481 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482 seq = charmap_find_value (charmap, utmp, 5);
1483 }
f0a4b6b1
UD
1484
1485 if (seq == NULL)
1486 /* Try looking in the repertoire map. */
1487 seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1488
4b10dd6c
UD
1489 /* If this is the first time we look for this sequence create a new
1490 entry. */
1491 if (seq == NULL)
1492 {
f0a4b6b1
UD
1493 static const struct charseq negative
1494 = { .ucs4 = ILLEGAL_CHAR_VALUE };
19bc17a9 1495
f0a4b6b1
UD
1496 /* Find the symbolic name for this UCS4 value. */
1497 if (repertoire != NULL)
4b10dd6c 1498 {
f0a4b6b1
UD
1499 const char *symbol = repertoire_find_symbol (repertoire,
1500 last_wch);
5866b131
UD
1501 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502 sizeof (uint32_t));
f0a4b6b1
UD
1503 *newp = last_wch;
1504
1505 if (symbol != NULL)
1506 /* We have a name, now search the multibyte value. */
1507 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508
1509 if (seq == NULL)
1510 /* We have to create a fake entry. */
1511 seq = (struct charseq *) &negative;
1512 else
1513 seq->ucs4 = last_wch;
1514
5866b131
UD
1515 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516 seq);
4b10dd6c
UD
1517 }
1518 else
f0a4b6b1
UD
1519 /* We have to create a fake entry. */
1520 seq = (struct charseq *) &negative;
4b10dd6c
UD
1521 }
1522
1523 /* We have a name, now search the multibyte value. */
1524 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1525 /* Yep, we can store information about this byte sequence. */
1526 ctype->class256_collection[(size_t) seq->bytes[0]]
1527 |= class256_bit;
1528
1529 /* And of course we have the UCS4 position. */
5866b131 1530 if (class_bit != 0)
4b10dd6c
UD
1531 *find_idx (ctype, &ctype->class_collection,
1532 &ctype->class_collection_max,
1533 &ctype->class_collection_act, last_wch) |= class_bit;
1534
1535 if (handle_digits == 1)
1536 {
1537 /* We must store the digit values. */
1538 if (ctype->mbdigits_act == ctype->mbdigits_max)
1539 {
1540 ctype->mbdigits_max *= 2;
1541 ctype->mbdigits = xrealloc (ctype->mbdigits,
1542 (ctype->mbdigits_max
1543 * sizeof (char *)));
1544 ctype->wcdigits_max *= 2;
1545 ctype->wcdigits = xrealloc (ctype->wcdigits,
1546 (ctype->wcdigits_max
1547 * sizeof (uint32_t)));
1548 }
1549
1550 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551 ? seq : NULL);
1552 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553 }
1554 else if (handle_digits == 2)
1555 {
1556 /* We must store the digit values. */
1557 if (ctype->outdigits_act >= 10)
1558 {
1559 lr_error (ldfile, _("\
1560%s: field `%s' does not contain exactly ten entries"),
1561 "LC_CTYPE", "outdigit");
1562 return;
1563 }
19bc17a9 1564
4b10dd6c
UD
1565 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566 ? seq : NULL);
1567 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568 ++ctype->outdigits_act;
1569 }
1570 }
19bc17a9
RM
1571}
1572
1573
4b10dd6c 1574/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1575static void
4b10dd6c
UD
1576charclass_charcode_ellipsis (struct linereader *ldfile,
1577 struct locale_ctype_t *ctype,
1578 struct charmap_t *charmap,
1579 struct repertoire_t *repertoire,
1580 struct token *now, char *last_charcode,
1581 uint32_t last_charcode_len,
1582 unsigned long int class256_bit,
1583 unsigned long int class_bit, int ignore_content,
1584 int handle_digits)
19bc17a9 1585{
4b10dd6c
UD
1586 /* First check whether the to-value is larger. */
1587 if (now->val.charcode.nbytes != last_charcode_len)
1588 {
1589 lr_error (ldfile, _("\
1590start end end character sequence of range must have the same length"));
1591 return;
1592 }
19bc17a9 1593
4b10dd6c 1594 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1595 {
4b10dd6c
UD
1596 lr_error (ldfile, _("\
1597to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1598 return;
1599 }
1600
4b10dd6c
UD
1601 if (!ignore_content)
1602 {
1603 do
1604 {
1605 /* Increment the byte sequence value. */
1606 struct charseq *seq;
1607 uint32_t wch;
1608 int i;
1609
1610 for (i = last_charcode_len - 1; i >= 0; --i)
1611 if (++last_charcode[i] != 0)
1612 break;
1613
1614 if (last_charcode_len == 1)
1615 /* Of course we have the charcode value. */
1616 ctype->class256_collection[(size_t) last_charcode[0]]
1617 |= class256_bit;
1618
1619 /* Find the symbolic name. */
1620 seq = charmap_find_symbol (charmap, last_charcode,
1621 last_charcode_len);
1622 if (seq != NULL)
1623 {
1624 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626 strlen (seq->name));
f0a4b6b1 1627 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
4b10dd6c
UD
1628
1629 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1630 *find_idx (ctype, &ctype->class_collection,
1631 &ctype->class_collection_max,
1632 &ctype->class_collection_act, wch) |= class_bit;
1633 }
1634 else
1635 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1636
4b10dd6c
UD
1637 if (handle_digits == 1)
1638 {
1639 /* We must store the digit values. */
1640 if (ctype->mbdigits_act == ctype->mbdigits_max)
1641 {
1642 ctype->mbdigits_max *= 2;
1643 ctype->mbdigits = xrealloc (ctype->mbdigits,
1644 (ctype->mbdigits_max
1645 * sizeof (char *)));
1646 ctype->wcdigits_max *= 2;
1647 ctype->wcdigits = xrealloc (ctype->wcdigits,
1648 (ctype->wcdigits_max
1649 * sizeof (uint32_t)));
1650 }
1651
1652 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1654 seq->nbytes = last_charcode_len;
1655
1656 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658 }
1659 else if (handle_digits == 2)
1660 {
1661 struct charseq *seq;
1662 /* We must store the digit values. */
1663 if (ctype->outdigits_act >= 10)
1664 {
1665 lr_error (ldfile, _("\
1666%s: field `%s' does not contain exactly ten entries"),
1667 "LC_CTYPE", "outdigit");
1668 return;
1669 }
1670
1671 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1673 seq->nbytes = last_charcode_len;
1674
1675 ctype->mboutdigits[ctype->outdigits_act] = seq;
1676 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677 ++ctype->outdigits_act;
1678 }
1679 }
1680 while (memcmp (last_charcode, now->val.charcode.bytes,
1681 last_charcode_len) != 0);
1682 }
19bc17a9
RM
1683}
1684
1685
4b10dd6c
UD
1686/* Read one transliteration entry. */
1687static uint32_t *
1688read_widestring (struct linereader *ldfile, struct token *now,
1689 struct charmap_t *charmap, struct repertoire_t *repertoire)
19bc17a9 1690{
4b10dd6c 1691 uint32_t *wstr;
19bc17a9 1692
4b10dd6c
UD
1693 if (now->tok == tok_default_missing)
1694 /* The special name "" will denote this case. */
5866b131 1695 wstr = ((uint32_t *) { 0 });
4b10dd6c 1696 else if (now->tok == tok_bsymbol)
19bc17a9 1697 {
4b10dd6c 1698 /* Get the value from the repertoire. */
a673fbcb 1699 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1700 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1701 now->val.str.lenmb);
1702 if (wstr[0] == ILLEGAL_CHAR_VALUE)
f0a4b6b1
UD
1703 {
1704 /* We cannot proceed, we don't know the UCS4 value. */
1705 free (wstr);
1706 return NULL;
1707 }
4b10dd6c
UD
1708
1709 wstr[1] = 0;
19bc17a9 1710 }
4b10dd6c 1711 else if (now->tok == tok_ucs4)
19bc17a9 1712 {
a673fbcb 1713 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1714 wstr[0] = now->val.ucs4;
1715 wstr[1] = 0;
1716 }
1717 else if (now->tok == tok_charcode)
1718 {
1719 /* Argh, we have to convert to the symbol name first and then to the
1720 UCS4 value. */
1721 struct charseq *seq = charmap_find_symbol (charmap,
1722 now->val.str.startmb,
1723 now->val.str.lenmb);
1724 if (seq == NULL)
1725 /* Cannot find the UCS4 value. */
1726 return NULL;
1727
1728 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1729 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1730 strlen (seq->name));
1731 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1732 /* We cannot proceed, we don't know the UCS4 value. */
1733 return NULL;
1734
a673fbcb 1735 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
4b10dd6c
UD
1736 wstr[0] = seq->ucs4;
1737 wstr[1] = 0;
1738 }
1739 else if (now->tok == tok_string)
1740 {
1741 wstr = now->val.str.startwc;
a673fbcb 1742 if (wstr == NULL || wstr[0] == 0)
4b10dd6c
UD
1743 return NULL;
1744 }
1745 else
1746 {
1747 if (now->tok != tok_eol && now->tok != tok_eof)
1748 lr_ignore_rest (ldfile, 0);
1749 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1750 return (uint32_t *) -1l;
19bc17a9
RM
1751 }
1752
4b10dd6c
UD
1753 return wstr;
1754}
19bc17a9 1755
19bc17a9 1756
4b10dd6c
UD
1757static void
1758read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1759 struct token *now, struct charmap_t *charmap,
1760 struct repertoire_t *repertoire)
1761{
1762 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1763 struct translit_t *result;
1764 struct translit_to_t **top;
a673fbcb 1765 struct obstack *ob = &ctype->mempool;
4b10dd6c
UD
1766 int first;
1767 int ignore;
1768
1769 if (from_wstr == NULL)
1770 /* There is no valid from string. */
1771 return;
19bc17a9 1772
4b10dd6c
UD
1773 result = (struct translit_t *) obstack_alloc (ob,
1774 sizeof (struct translit_t));
1775 result->from = from_wstr;
a673fbcb
UD
1776 result->fname = ldfile->fname;
1777 result->lineno = ldfile->lineno;
4b10dd6c
UD
1778 result->next = NULL;
1779 result->to = NULL;
1780 top = &result->to;
1781 first = 1;
1782 ignore = 0;
1783
1784 while (1)
1785 {
1786 uint32_t *to_wstr;
1787
1788 /* Next we have one or more transliterations. They are
1789 separated by semicolons. */
1790 now = lr_token (ldfile, charmap, repertoire);
1791
1792 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1793 {
1794 /* One string read. */
1795 const uint32_t zero = 0;
1796
1797 if (!ignore)
1798 {
1799 obstack_grow (ob, &zero, 4);
1800 to_wstr = obstack_finish (ob);
1801
1802 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1803 (*top)->str = to_wstr;
1804 (*top)->next = NULL;
1805 }
1806
1807 if (now->tok == tok_eol)
1808 {
1809 result->next = ctype->translit;
1810 ctype->translit = result;
1811 return;
1812 }
1813
1814 if (!ignore)
1815 top = &(*top)->next;
1816 ignore = 0;
1817 }
1818 else
1819 {
1820 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1821 if (to_wstr == (uint32_t *) -1l)
1822 {
1823 /* An error occurred. */
1824 obstack_free (ob, result);
1825 return;
1826 }
1827
1828 if (to_wstr == NULL)
1829 ignore = 1;
1830 else
1831 /* This value is usable. */
1832 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1833
4b10dd6c
UD
1834 first = 0;
1835 }
1836 }
19bc17a9
RM
1837}
1838
1839
a673fbcb
UD
1840static void
1841read_translit_ignore_entry (struct linereader *ldfile,
1842 struct locale_ctype_t *ctype,
1843 struct charmap_t *charmap,
1844 struct repertoire_t *repertoire)
1845{
1846 /* We expect a semicolon-separated list of characters we ignore. We are
1847 only interested in the wide character definitions. These must be
1848 single characters, possibly defining a range when an ellipsis is used. */
1849 while (1)
1850 {
1851 struct token *now = lr_token (ldfile, charmap, repertoire);
1852 struct translit_ignore_t *newp;
1853 uint32_t from;
1854
1855 if (now->tok == tok_eol || now->tok == tok_eof)
1856 {
1857 lr_error (ldfile,
1858 _("premature end of `translit_ignore' definition"));
1859 return;
1860 }
1861
1862 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1863 {
1864 lr_error (ldfile, _("syntax error"));
1865 lr_ignore_rest (ldfile, 0);
1866 return;
1867 }
1868
1869 if (now->tok == tok_ucs4)
1870 from = now->val.ucs4;
1871 else
f0a4b6b1
UD
1872 /* Try to get the value. */
1873 from = repertoire_find_value (repertoire, now->val.str.startmb,
1874 now->val.str.lenmb);
a673fbcb
UD
1875
1876 if (from == ILLEGAL_CHAR_VALUE)
1877 {
1878 lr_error (ldfile, "invalid character name");
1879 newp = NULL;
1880 }
1881 else
1882 {
1883 newp = (struct translit_ignore_t *)
1884 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1885 newp->from = from;
1886 newp->to = from;
a0dc5206 1887 newp->step = 1;
a673fbcb
UD
1888
1889 newp->next = ctype->translit_ignore;
1890 ctype->translit_ignore = newp;
1891 }
1892
1893 /* Now we expect either a semicolon, an ellipsis, or the end of the
1894 line. */
1895 now = lr_token (ldfile, charmap, repertoire);
1896
a0dc5206 1897 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
a673fbcb
UD
1898 {
1899 /* XXX Should we bother implementing `....'? `...' certainly
1900 will not be implemented. */
1901 uint32_t to;
a0dc5206 1902 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
a673fbcb
UD
1903
1904 now = lr_token (ldfile, charmap, repertoire);
1905
1906 if (now->tok == tok_eol || now->tok == tok_eof)
1907 {
1908 lr_error (ldfile,
1909 _("premature end of `translit_ignore' definition"));
1910 return;
1911 }
1912
1913 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1914 {
1915 lr_error (ldfile, _("syntax error"));
1916 lr_ignore_rest (ldfile, 0);
1917 return;
1918 }
1919
1920 if (now->tok == tok_ucs4)
1921 to = now->val.ucs4;
1922 else
f0a4b6b1
UD
1923 /* Try to get the value. */
1924 to = repertoire_find_value (repertoire, now->val.str.startmb,
1925 now->val.str.lenmb);
a673fbcb
UD
1926
1927 if (to == ILLEGAL_CHAR_VALUE)
1928 lr_error (ldfile, "invalid character name");
1929 else
1930 {
1931 /* Make sure the `to'-value is larger. */
1932 if (to >= from)
a0dc5206
UD
1933 {
1934 newp->to = to;
1935 newp->step = step;
1936 }
a673fbcb
UD
1937 else
1938 lr_error (ldfile, _("\
1939to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1940 (to | from) < 65536 ? 4 : 8, to,
1941 (to | from) < 65536 ? 4 : 8, from);
1942 }
1943
1944 /* And the next token. */
1945 now = lr_token (ldfile, charmap, repertoire);
1946 }
1947
1948 if (now->tok == tok_eol || now->tok == tok_eof)
1949 /* We are done. */
1950 return;
1951
1952 if (now->tok == tok_semicolon)
1953 /* Next round. */
1954 continue;
1955
1956 /* If we come here something is wrong. */
1957 lr_error (ldfile, _("syntax error"));
1958 lr_ignore_rest (ldfile, 0);
1959 return;
1960 }
1961}
1962
1963
4b10dd6c
UD
1964/* The parser for the LC_CTYPE section of the locale definition. */
1965void
1966ctype_read (struct linereader *ldfile, struct localedef_t *result,
1967 struct charmap_t *charmap, const char *repertoire_name,
1968 int ignore_content)
19bc17a9 1969{
4b10dd6c
UD
1970 struct repertoire_t *repertoire = NULL;
1971 struct locale_ctype_t *ctype;
1972 struct token *now;
1973 enum token_t nowtok;
19bc17a9 1974 size_t cnt;
4b10dd6c
UD
1975 struct charseq *last_seq;
1976 uint32_t last_wch = 0;
1977 enum token_t last_token;
1978 enum token_t ellipsis_token;
a0dc5206 1979 int step;
4b10dd6c
UD
1980 char last_charcode[16];
1981 size_t last_charcode_len = 0;
1982 const char *last_str = NULL;
1983 int mapidx;
19bc17a9 1984
4b10dd6c
UD
1985 /* Get the repertoire we have to use. */
1986 if (repertoire_name != NULL)
1987 repertoire = repertoire_read (repertoire_name);
19bc17a9 1988
4b10dd6c
UD
1989 /* The rest of the line containing `LC_CTYPE' must be free. */
1990 lr_ignore_rest (ldfile, 1);
19bc17a9 1991
4b10dd6c
UD
1992
1993 do
19bc17a9 1994 {
4b10dd6c
UD
1995 now = lr_token (ldfile, charmap, NULL);
1996 nowtok = now->tok;
19bc17a9 1997 }
4b10dd6c 1998 while (nowtok == tok_eol);
19bc17a9 1999
4b10dd6c
UD
2000 /* If we see `copy' now we are almost done. */
2001 if (nowtok == tok_copy)
2002 {
01ff9d0b
UD
2003 handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
2004 LC_CTYPE, "LC_CTYPE", ignore_content);
4b10dd6c
UD
2005 return;
2006 }
75cd5204 2007
4b10dd6c
UD
2008 /* Prepare the data structures. */
2009 ctype_startup (ldfile, result, charmap, ignore_content);
2010 ctype = result->categories[LC_CTYPE].ctype;
2011
2012 /* Remember the repertoire we use. */
2013 if (!ignore_content)
2014 ctype->repertoire = repertoire;
2015
2016 while (1)
19bc17a9 2017 {
4b10dd6c
UD
2018 unsigned long int class_bit = 0;
2019 unsigned long int class256_bit = 0;
2020 int handle_digits = 0;
2021
2022 /* Of course we don't proceed beyond the end of file. */
2023 if (nowtok == tok_eof)
2024 break;
2025
2026 /* Ingore empty lines. */
2027 if (nowtok == tok_eol)
19bc17a9 2028 {
4b10dd6c
UD
2029 now = lr_token (ldfile, charmap, NULL);
2030 nowtok = now->tok;
2031 continue;
2032 }
19bc17a9 2033
4b10dd6c
UD
2034 switch (nowtok)
2035 {
5491da0d
UD
2036 case tok_charclass:
2037 now = lr_token (ldfile, charmap, NULL);
2038 while (now->tok == tok_ident || now->tok == tok_string)
2039 {
2040 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2041 now = lr_token (ldfile, charmap, NULL);
2042 if (now->tok != tok_semicolon)
2043 break;
2044 now = lr_token (ldfile, charmap, NULL);
2045 }
2046 if (now->tok != tok_eol)
2047 SYNTAX_ERROR (_("\
2048%s: syntax error in definition of new character class"), "LC_CTYPE");
2049 break;
2050
2051 case tok_charconv:
2052 now = lr_token (ldfile, charmap, NULL);
2053 while (now->tok == tok_ident || now->tok == tok_string)
2054 {
2055 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2056 now = lr_token (ldfile, charmap, NULL);
2057 if (now->tok != tok_semicolon)
2058 break;
2059 now = lr_token (ldfile, charmap, NULL);
2060 }
2061 if (now->tok != tok_eol)
2062 SYNTAX_ERROR (_("\
2063%s: syntax error in definition of new character map"), "LC_CTYPE");
2064 break;
2065
4b10dd6c 2066 case tok_class:
b9eb05d6
UD
2067 /* Ignore the rest of the line if we don't need the input of
2068 this line. */
2069 if (ignore_content)
2070 {
2071 lr_ignore_rest (ldfile, 0);
2072 break;
2073 }
2074
4b10dd6c
UD
2075 /* We simply forget the `class' keyword and use the following
2076 operand to determine the bit. */
2077 now = lr_token (ldfile, charmap, NULL);
2078 if (now->tok == tok_ident || now->tok == tok_string)
2079 {
87372aa9 2080 /* Must can be one of the predefined class names. */
4b10dd6c
UD
2081 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2082 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2083 break;
2084 if (cnt >= ctype->nr_charclass)
2085 {
011ebfab 2086#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2087 if (now->val.str.lenmb == 8
2088 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2089 class_bit = _ISwspecial1;
2090 else if (now->val.str.lenmb == 8
2091 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2092 class_bit = _ISwspecial2;
2093 else if (now->val.str.lenmb == 8
2094 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2095 class_bit = _ISwspecial3;
2096 else
011ebfab 2097#endif
4b10dd6c 2098 {
87372aa9
UD
2099 /* OK, it's a new class. */
2100 ctype_class_new (ldfile, ctype, now->val.str.startmb);
4b10dd6c 2101
87372aa9 2102 class_bit = _ISwbit (ctype->nr_charclass - 1);
4b10dd6c
UD
2103 }
2104 }
2105 else
7f653277
UD
2106 {
2107 class_bit = _ISwbit (cnt);
4b10dd6c 2108
7f653277
UD
2109 free (now->val.str.startmb);
2110 }
4b10dd6c
UD
2111 }
2112 else if (now->tok == tok_digit)
2113 goto handle_tok_digit;
2114 else if (now->tok < tok_upper || now->tok > tok_blank)
2115 goto err_label;
2116 else
2117 {
2118 class_bit = BITw (now->tok);
2119 class256_bit = BIT (now->tok);
2120 }
2121
2122 /* The next character must be a semicolon. */
2123 now = lr_token (ldfile, charmap, NULL);
2124 if (now->tok != tok_semicolon)
2125 goto err_label;
2126 goto read_charclass;
2127
2128 case tok_upper:
2129 case tok_lower:
2130 case tok_alpha:
2131 case tok_alnum:
2132 case tok_space:
2133 case tok_cntrl:
2134 case tok_punct:
2135 case tok_graph:
2136 case tok_print:
2137 case tok_xdigit:
2138 case tok_blank:
b9eb05d6
UD
2139 /* Ignore the rest of the line if we don't need the input of
2140 this line. */
2141 if (ignore_content)
2142 {
2143 lr_ignore_rest (ldfile, 0);
2144 break;
2145 }
2146
4b10dd6c
UD
2147 class_bit = BITw (now->tok);
2148 class256_bit = BIT (now->tok);
2149 handle_digits = 0;
2150 read_charclass:
2151 ctype->class_done |= class_bit;
2152 last_token = tok_none;
2153 ellipsis_token = tok_none;
a0dc5206 2154 step = 1;
4b10dd6c
UD
2155 now = lr_token (ldfile, charmap, NULL);
2156 while (now->tok != tok_eol && now->tok != tok_eof)
2157 {
2158 uint32_t wch;
2159 struct charseq *seq;
2160
2161 if (ellipsis_token == tok_none)
2162 {
2163 if (get_character (now, charmap, repertoire, &seq, &wch))
2164 goto err_label;
2165
2166 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2167 /* Yep, we can store information about this byte
2168 sequence. */
2169 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2170
2171 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2172 && class_bit != 0)
2173 /* We have the UCS4 position. */
2174 *find_idx (ctype, &ctype->class_collection,
2175 &ctype->class_collection_max,
2176 &ctype->class_collection_act, wch) |= class_bit;
2177
2178 last_token = now->tok;
549b3c3a 2179 /* Terminate the string. */
9e2b7438
UD
2180 if (last_token == tok_bsymbol)
2181 {
2182 now->val.str.startmb[now->val.str.lenmb] = '\0';
2183 last_str = now->val.str.startmb;
2184 }
2185 else
2186 last_str = NULL;
4b10dd6c
UD
2187 last_seq = seq;
2188 last_wch = wch;
2189 memcpy (last_charcode, now->val.charcode.bytes, 16);
2190 last_charcode_len = now->val.charcode.nbytes;
2191
2192 if (!ignore_content && handle_digits == 1)
2193 {
2194 /* We must store the digit values. */
2195 if (ctype->mbdigits_act == ctype->mbdigits_max)
2196 {
b9eb05d6 2197 ctype->mbdigits_max += 10;
4b10dd6c
UD
2198 ctype->mbdigits = xrealloc (ctype->mbdigits,
2199 (ctype->mbdigits_max
2200 * sizeof (char *)));
b9eb05d6 2201 ctype->wcdigits_max += 10;
4b10dd6c
UD
2202 ctype->wcdigits = xrealloc (ctype->wcdigits,
2203 (ctype->wcdigits_max
2204 * sizeof (uint32_t)));
2205 }
2206
2207 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2208 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2209 }
2210 else if (!ignore_content && handle_digits == 2)
2211 {
2212 /* We must store the digit values. */
2213 if (ctype->outdigits_act >= 10)
2214 {
2215 lr_error (ldfile, _("\
2216%s: field `%s' does not contain exactly ten entries"),
2217 "LC_CTYPE", "outdigit");
2218 goto err_label;
2219 }
2220
2221 ctype->mboutdigits[ctype->outdigits_act] = seq;
2222 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2223 ++ctype->outdigits_act;
2224 }
2225 }
2226 else
2227 {
2228 /* Now it gets complicated. We have to resolve the
2229 ellipsis problem. First we must distinguish between
2230 the different kind of ellipsis and this must match the
2231 tokens we have seen. */
2232 assert (last_token != tok_none);
2233
2234 if (last_token != now->tok)
2235 {
2236 lr_error (ldfile, _("\
2237ellipsis range must be marked by two operands of same type"));
2238 lr_ignore_rest (ldfile, 0);
2239 break;
2240 }
2241
2242 if (last_token == tok_bsymbol)
2243 {
2244 if (ellipsis_token == tok_ellipsis3)
2245 lr_error (ldfile, _("with symbolic name range values \
2246the absolute ellipsis `...' must not be used"));
2247
2248 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2249 repertoire, now, last_str,
2250 class256_bit, class_bit,
2251 (ellipsis_token
2252 == tok_ellipsis4
2253 ? 10 : 16),
2254 ignore_content,
a0dc5206 2255 handle_digits, step);
4b10dd6c
UD
2256 }
2257 else if (last_token == tok_ucs4)
2258 {
2259 if (ellipsis_token != tok_ellipsis2)
2260 lr_error (ldfile, _("\
2261with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2262
2263 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2264 repertoire, now, last_wch,
2265 class256_bit, class_bit,
a0dc5206
UD
2266 ignore_content, handle_digits,
2267 step);
4b10dd6c
UD
2268 }
2269 else
2270 {
2271 assert (last_token == tok_charcode);
2272
2273 if (ellipsis_token != tok_ellipsis3)
2274 lr_error (ldfile, _("\
2275with character code range values one must use the absolute ellipsis `...'"));
2276
2277 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2278 repertoire, now,
2279 last_charcode,
2280 last_charcode_len,
2281 class256_bit, class_bit,
2282 ignore_content,
2283 handle_digits);
2284 }
2285
2286 /* Now we have used the last value. */
2287 last_token = tok_none;
2288 }
2289
2290 /* Next we expect a semicolon or the end of the line. */
2291 now = lr_token (ldfile, charmap, NULL);
2292 if (now->tok == tok_eol || now->tok == tok_eof)
2293 break;
2294
2295 if (last_token != tok_none
a0dc5206 2296 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
4b10dd6c 2297 {
a0dc5206
UD
2298 if (now->tok == tok_ellipsis2_2)
2299 {
2300 now->tok = tok_ellipsis2;
2301 step = 2;
2302 }
2303 else if (now->tok == tok_ellipsis4_2)
2304 {
2305 now->tok = tok_ellipsis4;
2306 step = 2;
2307 }
2308
4b10dd6c 2309 ellipsis_token = now->tok;
a0dc5206 2310
4b10dd6c
UD
2311 now = lr_token (ldfile, charmap, NULL);
2312 continue;
2313 }
2314
2315 if (now->tok != tok_semicolon)
2316 goto err_label;
2317
2318 /* And get the next character. */
2319 now = lr_token (ldfile, charmap, NULL);
2320
2321 ellipsis_token = tok_none;
a0dc5206 2322 step = 1;
4b10dd6c
UD
2323 }
2324 break;
2325
2326 case tok_digit:
b9eb05d6
UD
2327 /* Ignore the rest of the line if we don't need the input of
2328 this line. */
2329 if (ignore_content)
42d7c593
UD
2330 {
2331 lr_ignore_rest (ldfile, 0);
2332 break;
2333 }
b9eb05d6 2334
4b10dd6c
UD
2335 handle_tok_digit:
2336 class_bit = _ISwdigit;
2337 class256_bit = _ISdigit;
2338 handle_digits = 1;
2339 goto read_charclass;
2340
2341 case tok_outdigit:
b9eb05d6
UD
2342 /* Ignore the rest of the line if we don't need the input of
2343 this line. */
2344 if (ignore_content)
2345 {
2346 lr_ignore_rest (ldfile, 0);
2347 break;
2348 }
2349
4b10dd6c
UD
2350 if (ctype->outdigits_act != 0)
2351 lr_error (ldfile, _("\
2352%s: field `%s' declared more than once"),
2353 "LC_CTYPE", "outdigit");
2354 class_bit = 0;
2355 class256_bit = 0;
2356 handle_digits = 2;
2357 goto read_charclass;
2358
2359 case tok_toupper:
b9eb05d6
UD
2360 /* Ignore the rest of the line if we don't need the input of
2361 this line. */
2362 if (ignore_content)
2363 {
2364 lr_ignore_rest (ldfile, 0);
2365 break;
2366 }
2367
4b10dd6c
UD
2368 mapidx = 0;
2369 goto read_mapping;
2370
2371 case tok_tolower:
b9eb05d6
UD
2372 /* Ignore the rest of the line if we don't need the input of
2373 this line. */
2374 if (ignore_content)
2375 {
2376 lr_ignore_rest (ldfile, 0);
2377 break;
2378 }
2379
4b10dd6c
UD
2380 mapidx = 1;
2381 goto read_mapping;
2382
2383 case tok_map:
b9eb05d6
UD
2384 /* Ignore the rest of the line if we don't need the input of
2385 this line. */
2386 if (ignore_content)
2387 {
2388 lr_ignore_rest (ldfile, 0);
2389 break;
2390 }
2391
4b10dd6c
UD
2392 /* We simply forget the `map' keyword and use the following
2393 operand to determine the mapping. */
2394 now = lr_token (ldfile, charmap, NULL);
2395 if (now->tok == tok_ident || now->tok == tok_string)
2396 {
2397 size_t cnt;
2398
2399 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2400 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2401 break;
2402
7f653277
UD
2403 if (cnt < ctype->map_collection_nr)
2404 free (now->val.str.startmb);
2405 else
87372aa9
UD
2406 /* OK, it's a new map. */
2407 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2408
2409 mapidx = cnt;
4b10dd6c
UD
2410 }
2411 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2412 goto err_label;
2413 else
2414 mapidx = now->tok - tok_toupper;
2415
2416 now = lr_token (ldfile, charmap, NULL);
2417 /* This better should be a semicolon. */
2418 if (now->tok != tok_semicolon)
2419 goto err_label;
2420
2421 read_mapping:
2422 /* Test whether this mapping was already defined. */
2423 if (ctype->tomap_done[mapidx])
2424 {
2425 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2426 ctype->mapnames[mapidx]);
2427 lr_ignore_rest (ldfile, 0);
2428 break;
2429 }
2430 ctype->tomap_done[mapidx] = 1;
2431
2432 now = lr_token (ldfile, charmap, NULL);
2433 while (now->tok != tok_eol && now->tok != tok_eof)
2434 {
2435 struct charseq *from_seq;
2436 uint32_t from_wch;
2437 struct charseq *to_seq;
2438 uint32_t to_wch;
2439
2440 /* Every pair starts with an opening brace. */
2441 if (now->tok != tok_open_brace)
2442 goto err_label;
2443
2444 /* Next comes the from-value. */
2445 now = lr_token (ldfile, charmap, NULL);
2446 if (get_character (now, charmap, repertoire, &from_seq,
2447 &from_wch) != 0)
2448 goto err_label;
2449
2450 /* The next is a comma. */
2451 now = lr_token (ldfile, charmap, NULL);
2452 if (now->tok != tok_comma)
2453 goto err_label;
2454
2455 /* And the other value. */
2456 now = lr_token (ldfile, charmap, NULL);
2457 if (get_character (now, charmap, repertoire, &to_seq,
2458 &to_wch) != 0)
2459 goto err_label;
2460
2461 /* And the last thing is the closing brace. */
2462 now = lr_token (ldfile, charmap, NULL);
2463 if (now->tok != tok_close_brace)
2464 goto err_label;
2465
2466 if (!ignore_content)
2467 {
2468 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2469 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2470 /* We can use this value. */
2471 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2472 = to_seq->bytes[0];
2473
2474 if (from_wch != ILLEGAL_CHAR_VALUE
2475 && to_wch != ILLEGAL_CHAR_VALUE)
2476 /* Both correct values. */
2477 *find_idx (ctype, &ctype->map_collection[mapidx],
2478 &ctype->map_collection_max[mapidx],
2479 &ctype->map_collection_act[mapidx],
2480 from_wch) = to_wch;
2481 }
2482
2483 /* Now comes a semicolon or the end of the line/file. */
2484 now = lr_token (ldfile, charmap, NULL);
2485 if (now->tok == tok_semicolon)
2486 now = lr_token (ldfile, charmap, NULL);
2487 }
2488 break;
2489
2490 case tok_translit_start:
b9eb05d6
UD
2491 /* Ignore the rest of the line if we don't need the input of
2492 this line. */
2493 if (ignore_content)
2494 {
2495 lr_ignore_rest (ldfile, 0);
2496 break;
2497 }
2498
4b10dd6c
UD
2499 /* The rest of the line better should be empty. */
2500 lr_ignore_rest (ldfile, 1);
2501
2502 /* We count here the number of allocated entries in the `translit'
2503 array. */
2504 cnt = 0;
2505
2506 /* We proceed until we see the `translit_end' token. */
2507 while (now = lr_token (ldfile, charmap, repertoire),
2508 now->tok != tok_translit_end && now->tok != tok_eof)
2509 {
2510 if (now->tok == tok_eol)
2511 /* Ignore empty lines. */
2512 continue;
2513
2514 if (now->tok == tok_translit_end)
2515 {
2516 lr_ignore_rest (ldfile, 0);
2517 break;
2518 }
2519
2520 if (now->tok == tok_include)
2521 {
2522 /* We have to include locale. */
2523 const char *locale_name;
2524 const char *repertoire_name;
2525
2526 now = lr_token (ldfile, charmap, NULL);
2527 /* This should be a string or an identifier. In any
2528 case something to name a locale. */
2529 if (now->tok != tok_string && now->tok != tok_ident)
2530 {
2531 translit_syntax:
2532 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2533 lr_ignore_rest (ldfile, 0);
2534 continue;
2535 }
2536 locale_name = now->val.str.startmb;
2537
2538 /* Next should be a semicolon. */
2539 now = lr_token (ldfile, charmap, NULL);
2540 if (now->tok != tok_semicolon)
2541 goto translit_syntax;
2542
2543 /* Now the repertoire name. */
2544 now = lr_token (ldfile, charmap, NULL);
2545 if ((now->tok != tok_string && now->tok != tok_ident)
2546 || now->val.str.startmb == NULL)
2547 goto translit_syntax;
2548 repertoire_name = now->val.str.startmb;
2549
2550 /* We must not have more than one `include'. */
2551 if (ctype->translit_copy_locale != NULL)
2552 {
2553 lr_error (ldfile, _("\
2554%s: only one `include' instruction allowed"), "LC_CTYPE");
2555 lr_ignore_rest (ldfile, 0);
2556 continue;
2557 }
2558
2559 ctype->translit_copy_locale = locale_name;
2560 ctype->translit_copy_repertoire = repertoire_name;
2561
2562 /* The rest of the line must be empty. */
2563 lr_ignore_rest (ldfile, 1);
a673fbcb
UD
2564
2565 /* Make sure the locale is read. */
2566 add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
07dab0c3 2567 repertoire_name, 1, NULL);
a673fbcb
UD
2568 continue;
2569 }
2570 else if (now->tok == tok_default_missing)
2571 {
2572 uint32_t *wstr;
2573
2574 /* We expect a single character or string as the
2575 argument. */
2576 now = lr_token (ldfile, charmap, NULL);
2577 wstr = read_widestring (ldfile, now, charmap, repertoire);
2578
2579 if (wstr != NULL)
2580 {
2581 if (ctype->default_missing != NULL)
2582 {
2583 lr_error (ldfile, _("\
2584%s: duplicate `default_missing' definition"), "LC_CTYPE");
2585 error_at_line (0, 0, ctype->default_missing_file,
2586 ctype->default_missing_lineno,
2587 _("previous definition was here"));
2588 }
2589 else
2590 {
2591 ctype->default_missing = wstr;
2592 ctype->default_missing_file = ldfile->fname;
2593 ctype->default_missing_lineno = ldfile->lineno;
2594 }
2595 }
2596 lr_ignore_rest (ldfile, 1);
2597 continue;
2598 }
2599 else if (now->tok == tok_translit_ignore)
2600 {
2601 read_translit_ignore_entry (ldfile, ctype, charmap,
2602 repertoire);
4b10dd6c
UD
2603 continue;
2604 }
2605
2606 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2607 }
2608 break;
2609
2610 case tok_ident:
b9eb05d6
UD
2611 /* Ignore the rest of the line if we don't need the input of
2612 this line. */
2613 if (ignore_content)
2614 {
2615 lr_ignore_rest (ldfile, 0);
2616 break;
2617 }
2618
4b10dd6c
UD
2619 /* This could mean one of several things. First test whether
2620 it's a character class name. */
2621 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2622 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2623 break;
2624 if (cnt < ctype->nr_charclass)
2625 {
2626 class_bit = _ISwbit (cnt);
2627 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2628 free (now->val.str.startmb);
2629 goto read_charclass;
2630 }
5491da0d
UD
2631 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2632 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2633 break;
2634 if (cnt < ctype->map_collection_nr)
2635 {
2636 mapidx = cnt;
2637 free (now->val.str.startmb);
2638 goto read_mapping;
2639 }
011ebfab 2640#ifdef PREDEFINED_CLASSES
4b10dd6c
UD
2641 if (strcmp (now->val.str.startmb, "special1") == 0)
2642 {
2643 class_bit = _ISwspecial1;
2644 free (now->val.str.startmb);
2645 goto read_charclass;
2646 }
2647 if (strcmp (now->val.str.startmb, "special2") == 0)
2648 {
2649 class_bit = _ISwspecial2;
2650 free (now->val.str.startmb);
2651 goto read_charclass;
2652 }
2653 if (strcmp (now->val.str.startmb, "special3") == 0)
2654 {
2655 class_bit = _ISwspecial3;
2656 free (now->val.str.startmb);
2657 goto read_charclass;
2658 }
2659 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2660 {
2661 mapidx = 2;
2662 goto read_mapping;
2663 }
011ebfab 2664#endif
4b10dd6c
UD
2665 break;
2666
2667 case tok_end:
2668 /* Next we assume `LC_CTYPE'. */
2669 now = lr_token (ldfile, charmap, NULL);
2670 if (now->tok == tok_eof)
2671 break;
2672 if (now->tok == tok_eol)
2673 lr_error (ldfile, _("%s: incomplete `END' line"),
2674 "LC_CTYPE");
2675 else if (now->tok != tok_lc_ctype)
2676 lr_error (ldfile, _("\
2677%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2678 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2679 return;
2680
2681 default:
2682 err_label:
2683 if (now->tok != tok_eof)
2684 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2685 }
2686
4b10dd6c
UD
2687 /* Prepare for the next round. */
2688 now = lr_token (ldfile, charmap, NULL);
2689 nowtok = now->tok;
19bc17a9
RM
2690 }
2691
4b10dd6c
UD
2692 /* When we come here we reached the end of the file. */
2693 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2694}
2695
2696
2697static void
4b10dd6c
UD
2698set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2699 struct repertoire_t *repertoire)
19bc17a9 2700{
4b10dd6c
UD
2701 size_t cnt;
2702
19bc17a9
RM
2703 /* These function defines the default values for the classes and conversions
2704 according to POSIX.2 2.5.2.1.
2705 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2706 Don't move them unless you know what you do! */
2707
4b10dd6c 2708 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2709 {
2710 char tmp[2];
2711 int ch;
4b10dd6c
UD
2712 int bit = _ISbit (bitpos);
2713 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2714 /* Define string. */
2715 strcpy (tmp, "?");
2716
2717 for (ch = from; ch <= to; ++ch)
2718 {
4b10dd6c 2719 struct charseq *seq;
19bc17a9
RM
2720 tmp[0] = ch;
2721
4b10dd6c
UD
2722 seq = charmap_find_value (charmap, tmp, 1);
2723 if (seq == NULL)
2724 {
2725 if (!be_quiet)
2726 error (0, 0, _("\
2727%s: character `%s' not defined in charmap while needed as default value"),
2728 "LC_CTYPE", tmp);
19bc17a9 2729 }
4b10dd6c
UD
2730 else if (seq->nbytes != 1)
2731 error (0, 0, _("\
2732%s: character `%s' in charmap not representable with one byte"),
2733 "LC_CTYPE", tmp);
19bc17a9 2734 else
4b10dd6c 2735 ctype->class256_collection[seq->bytes[0]] |= bit;
f0a4b6b1
UD
2736
2737 /* No need to search here, the ASCII value is also the Unicode
2738 value. */
2739 ELEM (ctype, class_collection, , ch) |= bitw;
19bc17a9
RM
2740 }
2741 }
2742
2743 /* Set default values if keyword was not present. */
4b10dd6c 2744 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2745 /* "If this keyword [lower] is not specified, the lowercase letters
2746 `A' through `Z', ..., shall automatically belong to this class,
2747 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2748 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2749
4b10dd6c 2750 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2751 /* "If this keyword [lower] is not specified, the lowercase letters
2752 `a' through `z', ..., shall automatically belong to this class,
2753 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2754 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2755
4b10dd6c 2756 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2757 {
2758 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2759 class `lower' *must* be in class `alpha'. */
2760 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
96f0d1f5
UD
2761 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2762
2763 for (cnt = 0; cnt < 256; ++cnt)
2764 if ((ctype->class256_collection[cnt] & mask) != 0)
2765 ctype->class256_collection[cnt] |= BIT (tok_alpha);
19bc17a9
RM
2766
2767 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2768 if ((ctype->class_collection[cnt] & maskw) != 0)
2769 ctype->class_collection[cnt] |= BITw (tok_alpha);
19bc17a9
RM
2770 }
2771
4b10dd6c 2772 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2773 /* "If this keyword [digit] is not specified, the digits `0' through
2774 `9', ..., shall automatically belong to this class, with
2775 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2776 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2777
2778 /* "Only characters specified for the `alpha' and `digit' keyword
2779 shall be specified. Characters specified for the keyword `alpha'
2780 and `digit' are automatically included in this class. */
2781 {
2782 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
96f0d1f5
UD
2783 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2784
2785 for (cnt = 0; cnt < 256; ++cnt)
2786 if ((ctype->class256_collection[cnt] & mask) != 0)
2787 ctype->class256_collection[cnt] |= BIT (tok_alnum);
19bc17a9
RM
2788
2789 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
96f0d1f5
UD
2790 if ((ctype->class_collection[cnt] & maskw) != 0)
2791 ctype->class_collection[cnt] |= BITw (tok_alnum);
19bc17a9
RM
2792 }
2793
4b10dd6c 2794 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2795 /* "If this keyword [space] is not specified, the characters <space>,
2796 <form-feed>, <newline>, <carriage-return>, <tab>, and
2797 <vertical-tab>, ..., shall automatically belong to this class,
2798 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2799 {
4b10dd6c 2800 struct charseq *seq;
19bc17a9 2801
4b10dd6c 2802 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2803 if (seq == NULL)
2804 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2805 if (seq == NULL)
2806 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2807 if (seq == NULL)
880f421f
UD
2808 {
2809 if (!be_quiet)
2810 error (0, 0, _("\
4b10dd6c
UD
2811%s: character `%s' not defined while needed as default value"),
2812 "LC_CTYPE", "<space>");
2813 }
2814 else if (seq->nbytes != 1)
2815 error (0, 0, _("\
2816%s: character `%s' in charmap not representable with one byte"),
2817 "LC_CTYPE", "<space>");
2818 else
2819 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2820
f0a4b6b1 2821 /* No need to search. */
ce177a84 2822 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
19bc17a9 2823
4b10dd6c 2824 seq = charmap_find_value (charmap, "form-feed", 9);
f0a4b6b1
UD
2825 if (seq == NULL)
2826 seq = charmap_find_value (charmap, "U0000000C", 9);
4b10dd6c 2827 if (seq == NULL)
880f421f
UD
2828 {
2829 if (!be_quiet)
2830 error (0, 0, _("\
4b10dd6c
UD
2831%s: character `%s' not defined while needed as default value"),
2832 "LC_CTYPE", "<form-feed>");
2833 }
2834 else if (seq->nbytes != 1)
2835 error (0, 0, _("\
2836%s: character `%s' in charmap not representable with one byte"),
2837 "LC_CTYPE", "<form-feed>");
2838 else
2839 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2840
f0a4b6b1 2841 /* No need to search. */
ce177a84 2842 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
4b10dd6c 2843
19bc17a9 2844
4b10dd6c 2845 seq = charmap_find_value (charmap, "newline", 7);
f0a4b6b1
UD
2846 if (seq == NULL)
2847 seq = charmap_find_value (charmap, "U0000000A", 9);
4b10dd6c 2848 if (seq == NULL)
880f421f
UD
2849 {
2850 if (!be_quiet)
2851 error (0, 0, _("\
19bc17a9 2852character `%s' not defined while needed as default value"),
4b10dd6c
UD
2853 "<newline>");
2854 }
2855 else if (seq->nbytes != 1)
2856 error (0, 0, _("\
2857%s: character `%s' in charmap not representable with one byte"),
2858 "LC_CTYPE", "<newline>");
2859 else
2860 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2861
f0a4b6b1 2862 /* No need to search. */
ce177a84 2863 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
4b10dd6c 2864
19bc17a9 2865
4b10dd6c 2866 seq = charmap_find_value (charmap, "carriage-return", 15);
f0a4b6b1
UD
2867 if (seq == NULL)
2868 seq = charmap_find_value (charmap, "U0000000D", 9);
4b10dd6c 2869 if (seq == NULL)
880f421f
UD
2870 {
2871 if (!be_quiet)
2872 error (0, 0, _("\
4b10dd6c
UD
2873%s: character `%s' not defined while needed as default value"),
2874 "LC_CTYPE", "<carriage-return>");
2875 }
2876 else if (seq->nbytes != 1)
2877 error (0, 0, _("\
2878%s: character `%s' in charmap not representable with one byte"),
2879 "LC_CTYPE", "<carriage-return>");
2880 else
2881 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2882
f0a4b6b1 2883 /* No need to search. */
ce177a84 2884 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
4b10dd6c 2885
19bc17a9 2886
4b10dd6c 2887 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2888 if (seq == NULL)
2889 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c 2890 if (seq == NULL)
880f421f
UD
2891 {
2892 if (!be_quiet)
2893 error (0, 0, _("\
4b10dd6c
UD
2894%s: character `%s' not defined while needed as default value"),
2895 "LC_CTYPE", "<tab>");
2896 }
2897 else if (seq->nbytes != 1)
2898 error (0, 0, _("\
2899%s: character `%s' in charmap not representable with one byte"),
2900 "LC_CTYPE", "<tab>");
2901 else
2902 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2903
f0a4b6b1 2904 /* No need to search. */
ce177a84 2905 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
4b10dd6c 2906
4b10dd6c
UD
2907
2908 seq = charmap_find_value (charmap, "vertical-tab", 12);
f0a4b6b1
UD
2909 if (seq == NULL)
2910 seq = charmap_find_value (charmap, "U0000000B", 9);
4b10dd6c
UD
2911 if (seq == NULL)
2912 {
2913 if (!be_quiet)
2914 error (0, 0, _("\
2915%s: character `%s' not defined while needed as default value"),
2916 "LC_CTYPE", "<vertical-tab>");
2917 }
2918 else if (seq->nbytes != 1)
2919 error (0, 0, _("\
2920%s: character `%s' in charmap not representable with one byte"),
2921 "LC_CTYPE", "<vertical-tab>");
2922 else
2923 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
f0a4b6b1
UD
2924
2925 /* No need to search. */
ce177a84 2926 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
19bc17a9
RM
2927 }
2928
4b10dd6c 2929 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
2930 /* "If this keyword is not specified, the digits `0' to `9', the
2931 uppercase letters `A' through `F', and the lowercase letters `a'
2932 through `f', ..., shell automatically belong to this class, with
2933 implementation defined character values." [P1003.2, 2.5.2.1] */
2934 {
4b10dd6c
UD
2935 set_default (BITPOS (tok_xdigit), '0', '9');
2936 set_default (BITPOS (tok_xdigit), 'A', 'F');
2937 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
2938 }
2939
4b10dd6c 2940 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
2941 /* "If this keyword [blank] is unspecified, the characters <space> and
2942 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
2943 {
4b10dd6c 2944 struct charseq *seq;
19bc17a9 2945
4b10dd6c 2946 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
2947 if (seq == NULL)
2948 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
2949 if (seq == NULL)
2950 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c 2951 if (seq == NULL)
880f421f
UD
2952 {
2953 if (!be_quiet)
2954 error (0, 0, _("\
4b10dd6c
UD
2955%s: character `%s' not defined while needed as default value"),
2956 "LC_CTYPE", "<space>");
2957 }
2958 else if (seq->nbytes != 1)
2959 error (0, 0, _("\
2960%s: character `%s' in charmap not representable with one byte"),
2961 "LC_CTYPE", "<space>");
2962 else
2963 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2964
f0a4b6b1 2965 /* No need to search. */
ce177a84 2966 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
4b10dd6c 2967
4b10dd6c
UD
2968
2969 seq = charmap_find_value (charmap, "tab", 3);
f0a4b6b1
UD
2970 if (seq == NULL)
2971 seq = charmap_find_value (charmap, "U00000009", 9);
4b10dd6c
UD
2972 if (seq == NULL)
2973 {
2974 if (!be_quiet)
2975 error (0, 0, _("\
2976%s: character `%s' not defined while needed as default value"),
2977 "LC_CTYPE", "<tab>");
2978 }
2979 else if (seq->nbytes != 1)
2980 error (0, 0, _("\
2981%s: character `%s' in charmap not representable with one byte"),
2982 "LC_CTYPE", "<tab>");
2983 else
2984 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
f0a4b6b1
UD
2985
2986 /* No need to search. */
ce177a84 2987 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
19bc17a9
RM
2988 }
2989
4b10dd6c 2990 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
2991 /* "If this keyword [graph] is not specified, characters specified for
2992 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2993 shall belong to this character class." [P1003.2, 2.5.2.1] */
2994 {
2995 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2996 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
2997 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
2998 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
2999 BITw (tok_punct);
19bc17a9
RM
3000 size_t cnt;
3001
3002 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3003 if ((ctype->class_collection[cnt] & maskw) != 0)
3004 ctype->class_collection[cnt] |= BITw (tok_graph);
4b10dd6c
UD
3005
3006 for (cnt = 0; cnt < 256; ++cnt)
3007 if ((ctype->class256_collection[cnt] & mask) != 0)
3008 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
3009 }
3010
4b10dd6c 3011 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
3012 /* "If this keyword [print] is not provided, characters specified for
3013 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3014 and the <space> character shall belong to this character class."
3015 [P1003.2, 2.5.2.1] */
3016 {
3017 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3018 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
ce177a84
UD
3019 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3020 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3021 BITw (tok_punct);
19bc17a9 3022 size_t cnt;
4b10dd6c 3023 struct charseq *seq;
19bc17a9
RM
3024
3025 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
ce177a84
UD
3026 if ((ctype->class_collection[cnt] & maskw) != 0)
3027 ctype->class_collection[cnt] |= BITw (tok_print);
19bc17a9 3028
4b10dd6c
UD
3029 for (cnt = 0; cnt < 256; ++cnt)
3030 if ((ctype->class256_collection[cnt] & mask) != 0)
3031 ctype->class256_collection[cnt] |= BIT (tok_print);
3032
3033
4b10dd6c 3034 seq = charmap_find_value (charmap, "space", 5);
45c95239
UD
3035 if (seq == NULL)
3036 seq = charmap_find_value (charmap, "SP", 2);
f0a4b6b1
UD
3037 if (seq == NULL)
3038 seq = charmap_find_value (charmap, "U00000020", 9);
4b10dd6c
UD
3039 if (seq == NULL)
3040 {
3041 if (!be_quiet)
3042 error (0, 0, _("\
3043%s: character `%s' not defined while needed as default value"),
3044 "LC_CTYPE", "<space>");
3045 }
3046 else if (seq->nbytes != 1)
3047 error (0, 0, _("\
3048%s: character `%s' in charmap not representable with one byte"),
3049 "LC_CTYPE", "<space>");
3050 else
3051 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
f0a4b6b1
UD
3052
3053 /* No need to search. */
ce177a84 3054 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
19bc17a9
RM
3055 }
3056
4b10dd6c 3057 if (ctype->tomap_done[0] == 0)
6d52618b 3058 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
3059 `a' through `z', and their corresponding uppercase letters `A' to
3060 `Z', ..., shall automatically be included, with implementation-
3061 defined character values." [P1003.2, 2.5.2.1] */
3062 {
3063 char tmp[4];
3064 int ch;
3065
3066 strcpy (tmp, "<?>");
3067
3068 for (ch = 'a'; ch <= 'z'; ++ch)
3069 {
4b10dd6c 3070 struct charseq *seq_from, *seq_to;
19bc17a9
RM
3071
3072 tmp[1] = (char) ch;
3073
4b10dd6c
UD
3074 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3075 if (seq_from == NULL)
19bc17a9 3076 {
880f421f
UD
3077 if (!be_quiet)
3078 error (0, 0, _("\
4b10dd6c
UD
3079%s: character `%s' not defined while needed as default value"),
3080 "LC_CTYPE", tmp);
3081 }
3082 else if (seq_from->nbytes != 1)
3083 {
3084 if (!be_quiet)
3085 error (0, 0, _("\
3086%s: character `%s' needed as default value not representable with one byte"),
3087 "LC_CTYPE", tmp);
3088 }
3089 else
3090 {
3091 /* This conversion is implementation defined. */
3092 tmp[1] = (char) (ch + ('A' - 'a'));
3093 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3094 if (seq_to == NULL)
3095 {
3096 if (!be_quiet)
3097 error (0, 0, _("\
3098%s: character `%s' not defined while needed as default value"),
3099 "LC_CTYPE", tmp);
3100 }
3101 else if (seq_to->nbytes != 1)
3102 {
3103 if (!be_quiet)
3104 error (0, 0, _("\
3105%s: character `%s' needed as default value not representable with one byte"),
3106 "LC_CTYPE", tmp);
3107 }
3108 else
3109 /* The index [0] is determined by the order of the
3110 `ctype_map_newP' calls in `ctype_startup'. */
3111 ctype->map256_collection[0][seq_from->bytes[0]]
3112 = seq_to->bytes[0];
19bc17a9 3113 }
f0a4b6b1
UD
3114
3115 /* No need to search. */
3116 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
19bc17a9
RM
3117 }
3118 }
3119
4b10dd6c 3120 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
3121 /* "If this keyword [tolower] is not specified, the mapping shall be
3122 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3123 {
19bc17a9
RM
3124 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3125 if (ctype->map_collection[0][cnt] != 0)
3126 ELEM (ctype, map_collection, [1],
3127 ctype->map_collection[0][cnt])
3128 = ctype->charnames[cnt];
4b10dd6c
UD
3129
3130 for (cnt = 0; cnt < 256; ++cnt)
3131 if (ctype->map256_collection[0][cnt] != 0)
85cb60ff 3132 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
4b10dd6c
UD
3133 }
3134
3135 if (ctype->outdigits_act == 0)
3136 {
3137 for (cnt = 0; cnt < 10; ++cnt)
3138 {
3139 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3140 digits + cnt, 1);
3141
3142 if (ctype->mboutdigits[cnt] == NULL)
1b97149d
UD
3143 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3144 longnames[cnt],
3145 strlen (longnames[cnt]));
b9eb05d6 3146
1b97149d
UD
3147 if (ctype->mboutdigits[cnt] == NULL)
3148 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3149 uninames[cnt], 9);
b9eb05d6 3150
1b97149d 3151 if (ctype->mboutdigits[cnt] == NULL)
b9eb05d6 3152 {
1b97149d
UD
3153 /* Provide a replacement. */
3154 error (0, 0, _("\
3155no output digits defined and none of the standard names in the charmap"));
b9eb05d6 3156
1b97149d
UD
3157 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3158 sizeof (struct charseq)
3159 + 1);
b9eb05d6 3160
1b97149d
UD
3161 /* This is better than nothing. */
3162 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3163 ctype->mboutdigits[cnt]->nbytes = 1;
b9eb05d6 3164 }
1b97149d
UD
3165
3166 ctype->wcoutdigits[cnt] = L'0' + cnt;
4b10dd6c
UD
3167 }
3168
3169 ctype->outdigits_act = 10;
19bc17a9
RM
3170 }
3171}
3172
3173
3174static void
4b10dd6c
UD
3175allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3176 struct repertoire_t *repertoire)
19bc17a9
RM
3177{
3178 size_t idx;
a53e3292 3179 size_t width_table_size;
0e16ecfa
UD
3180 const void *key;
3181 size_t len;
3182 void *vdata;
3183 void *curs;
5d431a3e 3184
6d52618b
UD
3185 /* First we have to decide how we organize the arrays. It is easy
3186 for a one-byte character set. But multi-byte character set
3187 cannot be stored flat because the chars might be sparsely used.
3188 So we determine an optimal hashing function for the used
3189 characters.
3190
3191 We use a very trivial hashing function to store the sparse
3192 table. CH % TABSIZE is used as an index. To solve multiple hits
3193 we have N planes. This guarantees a fixed search time for a
42d7c593 3194 character [N / 2]. In the following code we determine the minimum
66ac0abe
UD
3195 value for TABSIZE * N, where TABSIZE >= 256.
3196
3197 Some people complained that this algorithm takes too long. Well,
3198 go on, improve it. But changing the step size is *not* an
3199 option. Some people changed this to use only sizes of prime
3200 numbers. Think again, do some math. We are looking for the
3201 optimal solution, not something which works in general. Unless
3202 somebody can provide a dynamic programming solution I think this
3203 implementation is as good as it can get. */
19bc17a9
RM
3204 size_t min_total = UINT_MAX;
3205 size_t act_size = 256;
3206
66ac0abe 3207 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3208 fputs (_("\
19bc17a9 3209Computing table size for character classes might take a while..."),
c84142e8 3210 stderr);
19bc17a9 3211
66ac0abe
UD
3212 /* While we want to have a small total size we are willing to use a
3213 little bit larger table if this reduces the number of layers.
3214 Therefore we add a little penalty to the number of planes.
3215 Maybe this constant has to be adjusted a bit. */
3216#define PENALTY 128
3217 do
19bc17a9
RM
3218 {
3219 size_t cnt[act_size];
3220 size_t act_planes = 1;
3221
3222 memset (cnt, '\0', sizeof cnt);
3223
3224 for (idx = 0; idx < 256; ++idx)
3225 cnt[idx] = 1;
3226
3227 for (idx = 0; idx < ctype->charnames_act; ++idx)
3228 if (ctype->charnames[idx] >= 256)
3229 {
3230 size_t nr = ctype->charnames[idx] % act_size;
3231
3232 if (++cnt[nr] > act_planes)
3233 {
3234 act_planes = cnt[nr];
66ac0abe 3235 if ((act_size + PENALTY) * act_planes >= min_total)
19bc17a9
RM
3236 break;
3237 }
3238 }
3239
66ac0abe 3240 if ((act_size + PENALTY) * act_planes < min_total)
19bc17a9 3241 {
66ac0abe 3242 min_total = (act_size + PENALTY) * act_planes;
19bc17a9
RM
3243 ctype->plane_size = act_size;
3244 ctype->plane_cnt = act_planes;
3245 }
3246
3247 ++act_size;
3248 }
66ac0abe 3249 while (act_size < min_total);
19bc17a9 3250
66ac0abe 3251 if (!be_quiet && ctype->charnames_act > 512)
c84142e8 3252 fputs (_(" done\n"), stderr);
19bc17a9 3253
75cd5204 3254
4a33c2f5
UD
3255 ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3256 * ctype->plane_cnt,
3257 sizeof (uint32_t));
19bc17a9
RM
3258
3259 for (idx = 1; idx < 256; ++idx)
4a33c2f5 3260 ctype->names[idx] = idx;
19bc17a9
RM
3261
3262 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
4a33c2f5 3263 ctype->names[0] = 1;
19bc17a9
RM
3264
3265 for (idx = 256; idx < ctype->charnames_act; ++idx)
3266 {
3267 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3268 size_t depth = 0;
3269
4a33c2f5 3270 while (ctype->names[nr + depth * ctype->plane_size])
19bc17a9
RM
3271 ++depth;
3272 assert (depth < ctype->plane_cnt);
3273
4a33c2f5 3274 ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
19bc17a9
RM
3275
3276 /* Now for faster access remember the index in the NAMES_B array. */
3277 ctype->charnames[idx] = nr + depth * ctype->plane_size;
3278 }
4a33c2f5 3279 ctype->names[0] = 0;
19bc17a9
RM
3280
3281
3282 /* You wonder about this amount of memory? This is only because some
3283 users do not manage to address the array with unsigned values or
3284 data types with range >= 256. '\200' would result in the array
3285 index -128. To help these poor people we duplicate the entries for
3286 128 up to 255 below the entry for \0. */
3287 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3288 sizeof (char_class_t));
3289 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
3290 * ctype->plane_cnt,
3291 sizeof (char_class32_t));
3292
4a33c2f5 3293 /* This is the array accessed using the multibyte string elements. */
4b10dd6c 3294 for (idx = 0; idx < 256; ++idx)
4a33c2f5 3295 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
19bc17a9 3296
75cd5204
RM
3297 /* Mirror first 127 entries. We must take care that entry -1 is not
3298 mirrored because EOF == -1. */
3299 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
3300 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3301
3302 /* The 32 bit array contains all characters. */
3303 for (idx = 0; idx < ctype->class_collection_act; ++idx)
4a33c2f5 3304 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
19bc17a9
RM
3305
3306 /* Room for table of mappings. */
49f2be5b
UD
3307 ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3308 ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
4a33c2f5 3309 * sizeof (uint32_t *));
19bc17a9
RM
3310
3311 /* Fill in all mappings. */
49f2be5b 3312 for (idx = 0; idx < 2; ++idx)
19bc17a9
RM
3313 {
3314 unsigned int idx2;
3315
3316 /* Allocate table. */
49f2be5b 3317 ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
19bc17a9
RM
3318
3319 /* Copy values from collection. */
4b10dd6c 3320 for (idx2 = 0; idx2 < 256; ++idx2)
4a33c2f5 3321 ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 3322
75cd5204
RM
3323 /* Mirror first 127 entries. We must take care not to map entry
3324 -1 because EOF == -1. */
3325 for (idx2 = 0; idx2 < 127; ++idx2)
4a33c2f5 3326 ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
19bc17a9 3327
75cd5204 3328 /* EOF must map to EOF. */
4a33c2f5 3329 ctype->map[idx][127] = EOF;
49f2be5b 3330 }
a9c27b3e 3331
49f2be5b
UD
3332 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3333 {
3334 unsigned int idx2;
3335
3336 /* Allocate table. */
f1d8b804
UD
3337 ctype->map32[idx] = (uint32_t *) xmalloc (ctype->plane_size
3338 * ctype->plane_cnt
3339 * sizeof (uint32_t));
49f2be5b
UD
3340
3341 /* Copy default value (identity mapping). */
f1d8b804 3342 memcpy (ctype->map32[idx], ctype->names,
49f2be5b
UD
3343 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
3344
3345 /* Copy values from collection. */
3346 for (idx2 = 0; idx2 < 256; ++idx2)
a9c27b3e 3347 if (ctype->map_collection[idx][idx2] != 0)
f1d8b804
UD
3348 ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
3349
3350 while (idx2 < ctype->map_collection_act[idx])
b06c53e7
UD
3351 {
3352 if (ctype->map_collection[idx][idx2] != 0)
450bf66e
UD
3353 ctype->map32[idx][ctype->charnames[idx2]] =
3354 ctype->map_collection[idx][idx2];
b06c53e7
UD
3355 ++idx2;
3356 }
19bc17a9
RM
3357 }
3358
3359 /* Extra array for class and map names. */
4b10dd6c
UD
3360 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3361 * sizeof (uint32_t));
3362 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3363 * sizeof (uint32_t));
75cd5204
RM
3364
3365 /* Array for width information. Because the expected width are very
3366 small we use only one single byte. This save space and we need
3367 not provide the information twice with both endianesses. */
5866b131
UD
3368 width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
3369 ctype->width = (unsigned char *) xmalloc (width_table_size);
3370
0e16ecfa
UD
3371 /* Initialize with -1. */
3372 memset (ctype->width, '\xff', width_table_size);
4b10dd6c 3373 if (charmap->width_rules != NULL)
75cd5204
RM
3374 {
3375 size_t cnt;
3376
4b10dd6c 3377 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
827ff758
UD
3378 {
3379 unsigned char bytes[charmap->mb_cur_max];
3380 int nbytes = charmap->width_rules[cnt].from->nbytes;
3381
3382 /* We have the range of character for which the width is
3383 specified described using byte sequences of the multibyte
3384 charset. We have to convert this to UCS4 now. And we
3385 cannot simply convert the beginning and the end of the
3386 sequence, we have to iterate over the byte sequence and
3387 convert it for every single character. */
3388 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3389
3390 while (nbytes < charmap->width_rules[cnt].to->nbytes
3391 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3392 nbytes) <= 0)
75cd5204 3393 {
827ff758 3394 /* Find the UCS value for `bytes'. */
827ff758 3395 int inner;
76e680a8
UD
3396 uint32_t wch;
3397 struct charseq *seq =
3398 charmap_find_symbol (charmap, bytes, nbytes);
3399
3400 if (seq == NULL)
3401 wch = ILLEGAL_CHAR_VALUE;
3402 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3403 wch = seq->ucs4;
3404 else
3405 wch = repertoire_find_value (ctype->repertoire, seq->name,
3406 strlen (seq->name));
827ff758
UD
3407
3408 if (wch != ILLEGAL_CHAR_VALUE)
3409 {
3410 /* Store the value. */
b1c9ad82 3411 size_t nr = wch % ctype->plane_size;
827ff758
UD
3412 size_t depth = 0;
3413
b1c9ad82 3414 while (ctype->names[nr + depth * ctype->plane_size] != wch)
0e16ecfa
UD
3415 {
3416 ++depth;
3417 assert (depth < ctype->plane_cnt);
3418 }
827ff758
UD
3419
3420 ctype->width[nr + depth * ctype->plane_size]
3421 = charmap->width_rules[cnt].width;
3422 }
3423
3424 /* "Increment" the bytes sequence. */
3425 inner = nbytes - 1;
3426 while (inner >= 0 && bytes[inner] == 0xff)
3427 --inner;
75cd5204 3428
827ff758
UD
3429 if (inner < 0)
3430 {
3431 /* We have to extend the byte sequence. */
3432 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3433 break;
75cd5204 3434
827ff758
UD
3435 bytes[0] = 1;
3436 memset (&bytes[1], 0, nbytes);
3437 ++nbytes;
3438 }
3439 else
3440 {
3441 ++bytes[inner];
3442 while (++inner < nbytes)
3443 bytes[inner] = 0;
3444 }
75cd5204 3445 }
827ff758 3446 }
75cd5204 3447 }
0200214b 3448
0e16ecfa
UD
3449 /* Now set all the other characters of the character set to the
3450 default width. */
3451 curs = NULL;
3452 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3453 {
3454 struct charseq *data = (struct charseq *) vdata;
3455 size_t nr;
3456 size_t depth;
3457
3458 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3459 data->ucs4 = repertoire_find_value (ctype->repertoire,
3460 data->name, len);
3461
3462 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3463 {
3464 nr = data->ucs4 % ctype->plane_size;
3465 depth = 0;
3466
3467 while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
3468 {
3469 ++depth;
3470 assert (depth < ctype->plane_cnt);
3471 }
3472
3473 if (ctype->width[nr + depth * ctype->plane_size]
3474 == (unsigned char) '\xff')
3475 ctype->width[nr + depth * ctype->plane_size] =
3476 charmap->width_default;
3477 }
3478 }
3479
4b10dd6c
UD
3480 /* Set MB_CUR_MAX. */
3481 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c 3482
4b10dd6c
UD
3483 /* Now determine the table for the transliteration information.
3484
3485 XXX It is not yet clear to me whether it is worth implementing a
3486 complicated algorithm which uses a hash table to locate the entries.
3487 For now I'll use a simple array which can be searching using binary
3488 search. */
3489 if (ctype->translit_copy_locale != NULL)
3490 {
3491 /* Fold in the transliteration information from the locale mentioned
3492 in the `include' statement. */
3493 struct locale_ctype_t *here = ctype;
3494
3495 do
3496 {
3497 struct localedef_t *other = find_locale (LC_CTYPE,
3498 here->translit_copy_locale,
3499 repertoire->name, charmap);
3500
3501 if (other == NULL)
3502 {
3503 error (0, 0, _("\
3504%s: transliteration data from locale `%s' not available"),
3505 "LC_CTYPE", here->translit_copy_locale);
3506 break;
3507 }
3508
3509 here = other->categories[LC_CTYPE].ctype;
3510
3511 /* Enqueue the information if necessary. */
3512 if (here->translit != NULL)
3513 {
3514 struct translit_t *endp = here->translit;
3515 while (endp->next != NULL)
3516 endp = endp->next;
3517
3518 endp->next = ctype->translit;
3519 ctype->translit = here->translit;
3520 }
3521 }
3522 while (here->translit_copy_locale != NULL);
3523 }
3524
3525 if (ctype->translit != NULL)
3526 {
3527 /* First count how many entries we have. This is the upper limit
3528 since some entries from the included files might be overwritten. */
3529 size_t number = 0;
3530 size_t cnt;
3531 struct translit_t *runp = ctype->translit;
3532 struct translit_t **sorted;
3533 size_t from_len, to_len;
3534
3535 while (runp != NULL)
3536 {
3537 ++number;
3538 runp = runp->next;
3539 }
3540
3541 /* Next we allocate an array large enough and fill in the values. */
a9c27b3e
UD
3542 sorted = (struct translit_t **) alloca (number
3543 * sizeof (struct translit_t **));
4b10dd6c
UD
3544 runp = ctype->translit;
3545 number = 0;
3546 do
3547 {
3548 /* Search for the place where to insert this string.
3549 XXX Better use a real sorting algorithm later. */
3550 size_t idx = 0;
3551 int replace = 0;
3552
3553 while (idx < number)
3554 {
3555 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3556 (const wchar_t *) runp->from);
3557 if (res == 0)
3558 {
3559 replace = 1;
3560 break;
3561 }
3562 if (res > 0)
3563 break;
3564 ++idx;
3565 }
3566
3567 if (replace)
3568 sorted[idx] = runp;
3569 else
3570 {
3571 memmove (&sorted[idx + 1], &sorted[idx],
3572 (number - idx) * sizeof (struct translit_t *));
3573 sorted[idx] = runp;
3574 ++number;
3575 }
3576
3577 runp = runp->next;
3578 }
3579 while (runp != NULL);
3580
3581 /* The next step is putting all the possible transliteration
3582 strings in one memory block so that we can write it out.
3583 We need several different blocks:
9ca23765 3584 - index to the from-string array
4b10dd6c
UD
3585 - from-string array
3586 - index to the to-string array
3587 - to-string array.
4b10dd6c
UD
3588 */
3589 from_len = to_len = 0;
3590 for (cnt = 0; cnt < number; ++cnt)
3591 {
3592 struct translit_to_t *srunp;
3593 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3594 srunp = sorted[cnt]->to;
3595 while (srunp != NULL)
3596 {
3597 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3598 srunp = srunp->next;
3599 }
3600 /* Plus one for the extra NUL character marking the end of
3601 the list for the current entry. */
3602 ++to_len;
3603 }
3604
3605 /* We can allocate the arrays for the results. */
4a33c2f5
UD
3606 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3607 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3608 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3609 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4b10dd6c
UD
3610
3611 from_len = 0;
3612 to_len = 0;
3613 for (cnt = 0; cnt < number; ++cnt)
3614 {
3615 size_t len;
3616 struct translit_to_t *srunp;
3617
4a33c2f5
UD
3618 ctype->translit_from_idx[cnt] = from_len;
3619 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3620
3621 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4a33c2f5 3622 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4b10dd6c
UD
3623 (const wchar_t *) sorted[cnt]->from, len);
3624 from_len += len;
3625
4a33c2f5 3626 ctype->translit_to_idx[cnt] = to_len;
4b10dd6c
UD
3627 srunp = sorted[cnt]->to;
3628 while (srunp != NULL)
3629 {
3630 len = wcslen ((const wchar_t *) srunp->str) + 1;
4a33c2f5 3631 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4b10dd6c
UD
3632 (const wchar_t *) srunp->str, len);
3633 to_len += len;
3634 srunp = srunp->next;
3635 }
4a33c2f5 3636 ctype->translit_to_tbl[to_len++] = L'\0';
4b10dd6c 3637 }
4b10dd6c
UD
3638
3639 /* Store the information about the length. */
3640 ctype->translit_idx_size = number * sizeof (uint32_t);
3641 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3642 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3643 }
3644 else
3645 {
3646 /* Provide some dummy pointers since we have nothing to write out. */
3647 static uint32_t no_str = { 0 };
3648
4a33c2f5
UD
3649 ctype->translit_from_idx = &no_str;
3650 ctype->translit_from_tbl = &no_str;
3651 ctype->translit_to_tbl = &no_str;
4b10dd6c
UD
3652 ctype->translit_idx_size = 0;
3653 ctype->translit_from_tbl_size = 0;
3654 ctype->translit_to_tbl_size = 0;
3655 }
19bc17a9 3656}