]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
f76d7052 1/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
c84142e8 2 This file is part of the GNU C Library.
4b10dd6c 3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
19bc17a9 4
c84142e8
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
c84142e8
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
c84142e8
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
4b10dd6c 25#include <byteswap.h>
19bc17a9 26#include <endian.h>
4b10dd6c 27#include <errno.h>
19bc17a9 28#include <limits.h>
4b10dd6c
UD
29#include <obstack.h>
30#include <stdlib.h>
19bc17a9 31#include <string.h>
4b10dd6c
UD
32#include <wchar.h>
33#include <wctype.h>
34#include <sys/uio.h>
19bc17a9 35
4b10dd6c 36#include "charmap.h"
19bc17a9
RM
37#include "localeinfo.h"
38#include "langinfo.h"
4b10dd6c 39#include "linereader.h"
19bc17a9 40#include "locfile-token.h"
4b10dd6c
UD
41#include "locfile.h"
42#include "localedef.h"
19bc17a9 43
19bc17a9
RM
44#include <assert.h>
45
46
4b10dd6c
UD
47/* These are the extra bits not in wctype.h since these are not preallocated
48 classes. */
49#define _ISwspecial1 (1 << 29)
50#define _ISwspecial2 (1 << 30)
51#define _ISwspecial3 (1 << 31)
19bc17a9
RM
52
53
54/* The bit used for representing a special class. */
55#define BITPOS(class) ((class) - tok_upper)
4b10dd6c
UD
56#define BIT(class) (_ISbit (BITPOS (class)))
57#define BITw(class) (_ISwbit (BITPOS (class)))
19bc17a9
RM
58
59#define ELEM(ctype, collection, idx, value) \
60 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
61 &ctype->collection##_act idx, value)
62
19bc17a9
RM
63
64/* To be compatible with former implementations we for now restrict
65 the number of bits for character classes to 16. When compatibility
66 is not necessary anymore increase the number to 32. */
4b10dd6c
UD
67#define char_class_t uint16_t
68#define CHAR_CLASS_TRANS bswap_16
69#define char_class32_t uint32_t
70#define CHAR_CLASS32_TRANS bswap_32
71
72
73/* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
77struct translit_to_t
78{
79 uint32_t *str;
80
81 struct translit_to_t *next;
82};
83
84struct translit_t
85{
86 uint32_t *from;
87
88 struct translit_to_t *to;
89
90 struct translit_t *next;
91};
19bc17a9
RM
92
93
94/* The real definition of the struct for the LC_CTYPE locale. */
95struct locale_ctype_t
96{
4b10dd6c 97 uint32_t *charnames;
19bc17a9
RM
98 size_t charnames_max;
99 size_t charnames_act;
100
4b10dd6c
UD
101 struct repertoire_t *repertoire;
102
103 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
104#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
ba1ffaa1 105 size_t nr_charclass;
19bc17a9 106 const char *classnames[MAX_NR_CHARCLASS];
4b10dd6c
UD
107 uint32_t last_class_char;
108 uint32_t class256_collection[256];
109 uint32_t *class_collection;
19bc17a9
RM
110 size_t class_collection_max;
111 size_t class_collection_act;
4b10dd6c
UD
112 uint32_t class_done;
113
114 struct charseq **mbdigits;
115 size_t mbdigits_act;
116 size_t mbdigits_max;
117 uint32_t *wcdigits;
118 size_t wcdigits_act;
119 size_t wcdigits_max;
120
121 struct charseq *mboutdigits[10];
122 uint32_t wcoutdigits[10];
123 size_t outdigits_act;
19bc17a9
RM
124
125 /* If the following number ever turns out to be too small simply
126 increase it. But I doubt it will. --drepper@gnu */
127#define MAX_NR_CHARMAP 16
128 const char *mapnames[MAX_NR_CHARMAP];
4b10dd6c
UD
129 uint32_t *map_collection[MAX_NR_CHARMAP];
130 uint32_t map256_collection[2][256];
9a0a462c
UD
131 size_t map_collection_max[MAX_NR_CHARMAP];
132 size_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
133 size_t map_collection_nr;
134 size_t last_map_idx;
4b10dd6c
UD
135 int tomap_done[MAX_NR_CHARMAP];
136
137 /* Transliteration information. */
138 const char *translit_copy_locale;
139 const char *translit_copy_repertoire;
140 struct translit_t *translit;
19bc17a9
RM
141
142 /* The arrays for the binary representation. */
4b10dd6c
UD
143 uint32_t plane_size;
144 uint32_t plane_cnt;
19bc17a9
RM
145 char_class_t *ctype_b;
146 char_class32_t *ctype32_b;
4b10dd6c
UD
147 uint32_t *names_el;
148 uint32_t *names_eb;
149 uint32_t **map_eb;
150 uint32_t **map_el;
151 uint32_t *class_name_ptr;
152 uint32_t *map_name_ptr;
75cd5204 153 unsigned char *width;
4b10dd6c 154 uint32_t mb_cur_max;
6990326c 155 const char *codeset_name;
4b10dd6c
UD
156 uint32_t translit_hash_size_eb;
157 uint32_t translit_hash_size_el;
158 uint32_t translit_hash_layers_eb;
159 uint32_t translit_hash_layers_el;
160 uint32_t *translit_from_idx_eb;
161 uint32_t *translit_from_idx_el;
162 uint32_t *translit_from_tbl_eb;
163 uint32_t *translit_from_tbl_el;
164 uint32_t *translit_to_idx_eb;
165 uint32_t *translit_to_idx_el;
166 uint32_t *translit_to_tbl_eb;
167 uint32_t *translit_to_tbl_el;
168 size_t translit_idx_size;
169 size_t translit_from_tbl_size;
170 size_t translit_to_tbl_size;
171
172 struct obstack mem_pool;
19bc17a9
RM
173};
174
175
4b10dd6c
UD
176#define obstack_chunk_alloc xmalloc
177#define obstack_chunk_free free
178
179
19bc17a9 180/* Prototypes for local functions. */
4b10dd6c
UD
181static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
182 struct charmap_t *charmap, int ignore_content);
183static void ctype_class_new (struct linereader *lr,
184 struct locale_ctype_t *ctype, const char *name);
185static void ctype_map_new (struct linereader *lr,
186 struct locale_ctype_t *ctype,
187 const char *name, struct charmap_t *charmap);
188static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
189 size_t *max, size_t *act, unsigned int idx);
19bc17a9 190static void set_class_defaults (struct locale_ctype_t *ctype,
4b10dd6c
UD
191 struct charmap_t *charmap,
192 struct repertoire_t *repertoire);
75cd5204 193static void allocate_arrays (struct locale_ctype_t *ctype,
4b10dd6c
UD
194 struct charmap_t *charmap,
195 struct repertoire_t *repertoire);
19bc17a9
RM
196
197
4b10dd6c
UD
198static const char *longnames[] =
199{
200 "zero", "one", "two", "three", "four",
201 "five", "six", "seven", "eight", "nine"
202};
203static const unsigned char digits[] = "0123456789";
204
205
206static void
19bc17a9 207ctype_startup (struct linereader *lr, struct localedef_t *locale,
4b10dd6c 208 struct charmap_t *charmap, int ignore_content)
19bc17a9
RM
209{
210 unsigned int cnt;
211 struct locale_ctype_t *ctype;
212
4b10dd6c 213 if (!ignore_content)
19bc17a9 214 {
4b10dd6c
UD
215 /* Allocate the needed room. */
216 locale->categories[LC_CTYPE].ctype = ctype =
217 (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
218
219 /* We have seen no names yet. */
220 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
221 ctype->charnames =
222 (unsigned int *) xmalloc (ctype->charnames_max
223 * sizeof (unsigned int));
224 for (cnt = 0; cnt < 256; ++cnt)
225 ctype->charnames[cnt] = cnt;
226 ctype->charnames_act = 256;
227
228 /* Fill character class information. */
229 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
230 /* The order of the following instructions determines the bit
231 positions! */
232 ctype_class_new (lr, ctype, "upper");
233 ctype_class_new (lr, ctype, "lower");
234 ctype_class_new (lr, ctype, "alpha");
235 ctype_class_new (lr, ctype, "digit");
236 ctype_class_new (lr, ctype, "xdigit");
237 ctype_class_new (lr, ctype, "space");
238 ctype_class_new (lr, ctype, "print");
239 ctype_class_new (lr, ctype, "graph");
240 ctype_class_new (lr, ctype, "blank");
241 ctype_class_new (lr, ctype, "cntrl");
242 ctype_class_new (lr, ctype, "punct");
243 ctype_class_new (lr, ctype, "alnum");
244 /* The following are extensions from ISO 14652. */
245 ctype_class_new (lr, ctype, "left_to_right");
246 ctype_class_new (lr, ctype, "right_to_left");
247 ctype_class_new (lr, ctype, "num_terminator");
248 ctype_class_new (lr, ctype, "num_separator");
249 ctype_class_new (lr, ctype, "segment_separator");
250 ctype_class_new (lr, ctype, "block_separator");
251 ctype_class_new (lr, ctype, "direction_control");
252 ctype_class_new (lr, ctype, "sym_swap_layout");
253 ctype_class_new (lr, ctype, "char_shape_selector");
254 ctype_class_new (lr, ctype, "num_shape_selector");
255 ctype_class_new (lr, ctype, "non_spacing");
256 ctype_class_new (lr, ctype, "non_spacing_level3");
257 ctype_class_new (lr, ctype, "normal_connect");
258 ctype_class_new (lr, ctype, "r_connect");
259 ctype_class_new (lr, ctype, "no_connect");
260 ctype_class_new (lr, ctype, "no_connect-space");
261 ctype_class_new (lr, ctype, "vowel_connect");
262
263 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
264 ctype->class_collection
265 = (uint32_t *) xcalloc (sizeof (unsigned long int),
266 ctype->class_collection_max);
267 ctype->class_collection_act = 256;
268
269 /* Fill character map information. */
270 ctype->map_collection_nr = 0;
271 ctype->last_map_idx = MAX_NR_CHARMAP;
272 ctype_map_new (lr, ctype, "toupper", charmap);
273 ctype_map_new (lr, ctype, "tolower", charmap);
274 ctype_map_new (lr, ctype, "tosymmetric", charmap);
275
276 /* Fill first 256 entries in `toXXX' arrays. */
277 for (cnt = 0; cnt < 256; ++cnt)
278 {
279 ctype->map_collection[0][cnt] = cnt;
280 ctype->map_collection[1][cnt] = cnt;
281 ctype->map_collection[2][cnt] = cnt;
282 ctype->map256_collection[0][cnt] = cnt;
283 ctype->map256_collection[1][cnt] = cnt;
284 }
285
286 obstack_init (&ctype->mem_pool);
19bc17a9
RM
287 }
288}
289
290
291void
4b10dd6c 292ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
19bc17a9
RM
293{
294 /* See POSIX.2, table 2-6 for the meaning of the following table. */
295#define NCLASS 12
296 static const struct
297 {
298 const char *name;
299 const char allow[NCLASS];
300 }
301 valid_table[NCLASS] =
302 {
303 /* The order is important. See token.h for more information.
304 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
305 { "upper", "--MX-XDDXXX-" },
306 { "lower", "--MX-XDDXXX-" },
307 { "alpha", "---X-XDDXXX-" },
308 { "digit", "XXX--XDDXXX-" },
309 { "xdigit", "-----XDDXXX-" },
310 { "space", "XXXXX------X" },
311 { "print", "---------X--" },
312 { "graph", "---------X--" },
313 { "blank", "XXXXXM-----X" },
314 { "cntrl", "XXXXX-XX--XX" },
315 { "punct", "XXXXX-DD-X-X" },
316 { "alnum", "-----XDDXXX-" }
317 };
318 size_t cnt;
319 int cls1, cls2;
4b10dd6c
UD
320 uint32_t space_value;
321 struct charseq *space_seq;
19bc17a9 322 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
4b10dd6c 323 int warned;
19bc17a9
RM
324
325 /* Set default value for classes not specified. */
4b10dd6c 326 set_class_defaults (ctype, charmap, ctype->repertoire);
19bc17a9
RM
327
328 /* Check according to table. */
329 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
330 {
4b10dd6c 331 uint32_t tmp = ctype->class_collection[cnt];
19bc17a9 332
4b10dd6c
UD
333 if (tmp != 0)
334 {
335 for (cls1 = 0; cls1 < NCLASS; ++cls1)
336 if ((tmp & _ISwbit (cls1)) != 0)
337 for (cls2 = 0; cls2 < NCLASS; ++cls2)
338 if (valid_table[cls1].allow[cls2] != '-')
19bc17a9 339 {
4b10dd6c
UD
340 int eq = (tmp & _ISwbit (cls2)) != 0;
341 switch (valid_table[cls1].allow[cls2])
19bc17a9 342 {
4b10dd6c
UD
343 case 'M':
344 if (!eq)
345 {
346 uint32_t value = ctype->charnames[cnt];
347
348 if (!be_quiet)
349 error (0, 0, _("\
350character L'\\u%0*x' in class `%s' must be in class `%s'"),
351 value > 0xffff ? 8 : 4, value,
352 valid_table[cls1].name,
353 valid_table[cls2].name);
354 }
355 break;
356
357 case 'X':
358 if (eq)
359 {
360 uint32_t value = ctype->charnames[cnt];
361
362 if (!be_quiet)
363 error (0, 0, _("\
364character L'\\u%0*x' in class `%s' must not be in class `%s'"),
365 value > 0xffff ? 8 : 4, value,
366 valid_table[cls1].name,
367 valid_table[cls2].name);
368 }
369 break;
370
371 case 'D':
372 ctype->class_collection[cnt] |= _ISwbit (cls2);
373 break;
374
375 default:
376 error (5, 0, _("internal error in %s, line %u"),
377 __FUNCTION__, __LINE__);
19bc17a9 378 }
4b10dd6c
UD
379 }
380 }
381 }
382
383 for (cnt = 0; cnt < 256; ++cnt)
384 {
385 uint32_t tmp = ctype->class256_collection[cnt];
19bc17a9 386
4b10dd6c
UD
387 if (tmp != 0)
388 {
389 for (cls1 = 0; cls1 < NCLASS; ++cls1)
390 if ((tmp & _ISbit (cls1)) != 0)
391 for (cls2 = 0; cls2 < NCLASS; ++cls2)
392 if (valid_table[cls1].allow[cls2] != '-')
393 {
394 int eq = (tmp & _ISbit (cls2)) != 0;
395 switch (valid_table[cls1].allow[cls2])
19bc17a9 396 {
4b10dd6c
UD
397 case 'M':
398 if (!eq)
399 {
400 char buf[17];
401
402 sprintf (buf, "\\%o", cnt);
403
404 if (!be_quiet)
405 error (0, 0, _("\
406character '%s' in class `%s' must be in class `%s'"),
407 buf, valid_table[cls1].name,
408 valid_table[cls2].name);
409 }
410 break;
411
412 case 'X':
413 if (eq)
414 {
415 char buf[17];
416
417 sprintf (buf, "\\%o", cnt);
418
419 if (!be_quiet)
420 error (0, 0, _("\
421character '%s' in class `%s' must not be in class `%s'"),
422 buf, valid_table[cls1].name,
423 valid_table[cls2].name);
424 }
425 break;
426
427 case 'D':
428 ctype->class256_collection[cnt] |= _ISbit (cls2);
429 break;
430
431 default:
432 error (5, 0, _("internal error in %s, line %u"),
433 __FUNCTION__, __LINE__);
19bc17a9 434 }
4b10dd6c
UD
435 }
436 }
19bc17a9
RM
437 }
438
439 /* ... and now test <SP> as a special case. */
4b10dd6c
UD
440 space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
441 if (space_value == ILLEGAL_CHAR_VALUE)
880f421f
UD
442 {
443 if (!be_quiet)
444 error (0, 0, _("character <SP> not defined in character map"));
445 }
c84142e8
UD
446 else if (((cnt = BITPOS (tok_space),
447 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 448 & BITw (tok_space)) == 0)
c84142e8
UD
449 || (cnt = BITPOS (tok_blank),
450 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 451 & BITw (tok_blank)) == 0)))
880f421f
UD
452 {
453 if (!be_quiet)
454 error (0, 0, _("<SP> character not in class `%s'"),
455 valid_table[cnt].name);
456 }
c84142e8
UD
457 else if (((cnt = BITPOS (tok_punct),
458 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 459 & BITw (tok_punct)) != 0)
c84142e8
UD
460 || (cnt = BITPOS (tok_graph),
461 (ELEM (ctype, class_collection, , space_value)
4b10dd6c 462 & BITw (tok_graph))
880f421f
UD
463 != 0)))
464 {
465 if (!be_quiet)
466 error (0, 0, _("<SP> character must not be in class `%s'"),
467 valid_table[cnt].name);
468 }
19bc17a9 469 else
4b10dd6c
UD
470 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
471
472 space_seq = charmap_find_value (charmap, "SP", 2);
473 if (space_seq == NULL || space_seq->nbytes != 1)
474 {
475 if (!be_quiet)
476 error (0, 0, _("character <SP> not defined in character map"));
477 }
478 else if (((cnt = BITPOS (tok_space),
479 (ctype->class256_collection[space_seq->bytes[0]]
480 & BIT (tok_space)) == 0)
481 || (cnt = BITPOS (tok_blank),
482 (ctype->class256_collection[space_seq->bytes[0]]
483 & BIT (tok_blank)) == 0)))
484 {
485 if (!be_quiet)
486 error (0, 0, _("<SP> character not in class `%s'"),
487 valid_table[cnt].name);
488 }
489 else if (((cnt = BITPOS (tok_punct),
490 (ctype->class256_collection[space_seq->bytes[0]]
491 & BIT (tok_punct)) != 0)
492 || (cnt = BITPOS (tok_graph),
493 (ctype->class256_collection[space_seq->bytes[0]]
494 & BIT (tok_graph)) != 0)))
495 {
496 if (!be_quiet)
497 error (0, 0, _("<SP> character must not be in class `%s'"),
498 valid_table[cnt].name);
499 }
500 else
501 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
75cd5204
RM
502
503 /* Now that the tests are done make sure the name array contains all
504 characters which are handled in the WIDTH section of the
505 character set definition file. */
4b10dd6c
UD
506 if (charmap->width_rules != NULL)
507 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
75cd5204 508 {
4b10dd6c 509#if 0
75cd5204 510 size_t inner;
4b10dd6c
UD
511 for (inner = charmap->width_rules[cnt].from;
512 inner <= charmap->width_rules[cnt].to; ++inner)
75cd5204 513 (void) find_idx (ctype, NULL, NULL, NULL, inner);
4b10dd6c
UD
514#else
515 /* XXX Handle width. We must convert from the charseq to the
516 repertoire value */
517 abort ();
518#endif
519 }
520
521 /* There must be a multiple of 10 digits. */
522 if (ctype->mbdigits_act % 10 != 0)
523 {
524 assert (ctype->mbdigits_act == ctype->wcdigits_act);
525 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
526 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
527 error (0, 0, _("`digit' category has not entries in groups of ten"));
528 }
529
530 /* Check the input digits. There must be a multiple of ten available.
531 In each group I could be that one or the other character is missing.
532 In this case the whole group must be removed. */
533 cnt = 0;
534 while (cnt < ctype->mbdigits_act)
535 {
536 size_t inner;
537 for (inner = 0; inner < 10; ++inner)
538 if (ctype->mbdigits[cnt + inner] == NULL)
539 break;
540
541 if (inner == 10)
542 cnt += 10;
543 else
544 {
545 /* Remove the group. */
546 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
547 ((ctype->wcdigits_act - cnt - 10)
548 * sizeof (ctype->mbdigits[0])));
549 ctype->mbdigits_act -= 10;
550 }
551 }
552
553 /* If no input digits are given use the default. */
554 if (ctype->mbdigits_act == 0)
555 {
556 if (ctype->mbdigits_max == 0)
557 {
558 ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
559 10 * sizeof (struct charseq *));
560 ctype->mbdigits_max = 10;
561 }
562
563 for (cnt = 0; cnt < 10; ++cnt)
564 {
565 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
566 digits + cnt, 1);
567 if (ctype->mbdigits[cnt] == NULL)
568 {
569 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
570 longnames[cnt],
571 strlen (longnames[cnt]));
572 if (ctype->mbdigits[cnt] == NULL)
573 {
574 /* Hum, this ain't good. */
575 error (0, 0, _("\
576no input digits defined and none of the standard names in the charmap"));
577
578 ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
579 sizeof (struct charseq) + 1);
580
581 /* This is better than nothing. */
582 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
583 ctype->mbdigits[cnt]->nbytes = 1;
584 }
585 }
586 }
587
588 ctype->mbdigits_act = 10;
589 }
590
591 /* Check the wide character input digits. There must be a multiple
592 of ten available. In each group I could be that one or the other
593 character is missing. In this case the whole group must be
594 removed. */
595 cnt = 0;
596 while (cnt < ctype->wcdigits_act)
597 {
598 size_t inner;
599 for (inner = 0; inner < 10; ++inner)
600 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
601 break;
602
603 if (inner == 10)
604 cnt += 10;
605 else
606 {
607 /* Remove the group. */
608 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
609 ((ctype->wcdigits_act - cnt - 10)
610 * sizeof (ctype->wcdigits[0])));
611 ctype->wcdigits_act -= 10;
612 }
613 }
614
615 /* If no input digits are given use the default. */
616 if (ctype->wcdigits_act == 0)
617 {
618 if (ctype->wcdigits_max == 0)
619 {
620 ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
621 10 * sizeof (uint32_t));
622 ctype->wcdigits_max = 10;
623 }
624
625 for (cnt = 0; cnt < 10; ++cnt)
626 ctype->wcdigits[cnt] = L'0' + cnt;
627
628 ctype->mbdigits_act = 10;
629 }
630
631 /* Check the outdigits. */
632 warned = 0;
633 for (cnt = 0; cnt < 10; ++cnt)
634 if (ctype->mboutdigits[cnt] == NULL)
635 {
636 static struct charseq replace[2];
637
638 if (!warned)
639 {
640 error (0, 0, _("\
641not all characters used in `outdigit' are available in the charmap"));
642 warned = 1;
643 }
644
645 replace[0].nbytes = 1;
646 replace[0].bytes[0] = '?';
647 replace[0].bytes[1] = '\0';
648 ctype->mboutdigits[cnt] = &replace[0];
649 }
650
651 warned = 0;
652 for (cnt = 0; cnt < 10; ++cnt)
653 if (ctype->wcoutdigits[cnt] == 0)
654 {
655 if (!warned)
656 {
657 error (0, 0, _("\
658not all characters used in `outdigit' are available in the repertoire"));
659 warned = 1;
660 }
661
662 ctype->wcoutdigits[cnt] = L'?';
75cd5204 663 }
19bc17a9
RM
664}
665
666
667void
4b10dd6c 668ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
75cd5204 669 const char *output_path)
19bc17a9
RM
670{
671 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
672 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
673 + 2 * (ctype->map_collection_nr - 2));
75cd5204
RM
674 struct iovec iov[2 + nelems + ctype->nr_charclass
675 + ctype->map_collection_nr];
19bc17a9 676 struct locale_file data;
4b10dd6c 677 uint32_t idx[nelems + 1];
75cd5204 678 size_t elem, cnt, offset, total;
4b10dd6c 679 char *cp;
19bc17a9
RM
680
681 /* Now prepare the output: Find the sizes of the table we can use. */
4b10dd6c 682 allocate_arrays (ctype, charmap, ctype->repertoire);
19bc17a9
RM
683
684 data.magic = LIMAGIC (LC_CTYPE);
685 data.n = nelems;
686 iov[0].iov_base = (void *) &data;
687 iov[0].iov_len = sizeof (data);
688
689 iov[1].iov_base = (void *) idx;
690 iov[1].iov_len = sizeof (idx);
691
692 idx[0] = iov[0].iov_len + iov[1].iov_len;
693 offset = 0;
694
695 for (elem = 0; elem < nelems; ++elem)
696 {
697 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
698 switch (elem)
699 {
700#define CTYPE_DATA(name, base, len) \
701 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
702 iov[2 + elem + offset].iov_base = (base); \
703 iov[2 + elem + offset].iov_len = (len); \
75cd5204
RM
704 if (elem + 1 < nelems) \
705 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
706 break
707
708 CTYPE_DATA (_NL_CTYPE_CLASS,
709 ctype->ctype_b,
710 (256 + 128) * sizeof (char_class_t));
711
712 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
713 ctype->map_eb[0],
714 (ctype->plane_size * ctype->plane_cnt + 128)
4b10dd6c 715 * sizeof (uint32_t));
19bc17a9
RM
716 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
717 ctype->map_eb[1],
718 (ctype->plane_size * ctype->plane_cnt + 128)
4b10dd6c 719 * sizeof (uint32_t));
19bc17a9
RM
720
721 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
722 ctype->map_el[0],
723 (ctype->plane_size * ctype->plane_cnt + 128)
4b10dd6c 724 * sizeof (uint32_t));
19bc17a9
RM
725 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
726 ctype->map_el[1],
727 (ctype->plane_size * ctype->plane_cnt + 128)
4b10dd6c 728 * sizeof (uint32_t));
19bc17a9
RM
729
730 CTYPE_DATA (_NL_CTYPE_CLASS32,
731 ctype->ctype32_b,
732 (ctype->plane_size * ctype->plane_cnt
733 * sizeof (char_class32_t)));
734
735 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
7a12c6bb 736 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
4b10dd6c 737 * sizeof (uint32_t)));
19bc17a9 738 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
7a12c6bb 739 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
4b10dd6c
UD
740 * sizeof (uint32_t)));
741
742 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EB,
743 &ctype->translit_hash_size_eb, sizeof (uint32_t));
744 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EL,
745 &ctype->translit_hash_size_el, sizeof (uint32_t));
746 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EB,
747 &ctype->translit_hash_layers_eb, sizeof (uint32_t));
748 CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EL,
749 &ctype->translit_hash_layers_el, sizeof (uint32_t));
750
751 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EB,
752 ctype->translit_from_idx_eb,
753 ctype->translit_idx_size);
754 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EL,
755 ctype->translit_from_idx_el,
756 ctype->translit_idx_size);
757
758 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EB,
759 ctype->translit_from_tbl_eb,
760 ctype->translit_from_tbl_size);
761 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EL,
762 ctype->translit_from_tbl_el,
763 ctype->translit_from_tbl_size);
764
765 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EB,
766 ctype->translit_to_idx_eb,
767 ctype->translit_idx_size);
768 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EL,
769 ctype->translit_to_idx_el,
770 ctype->translit_idx_size);
771
772 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EB,
773 ctype->translit_to_tbl_eb, ctype->translit_to_tbl_size);
774 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EL,
775 ctype->translit_to_tbl_el, ctype->translit_to_tbl_size);
776
777#if __BYTE_ORDER == __BIG_ENDIAN
778 CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EB,
779 &ctype->plane_size, sizeof (uint32_t));
780 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EB,
781 &ctype->plane_cnt, sizeof (uint32_t));
782#else
783 case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EB):
784 iov[2 + elem + offset].iov_base =
785 (uint32_t *) alloca (sizeof (uint32_t));
786 *(uint32_t *) iov[2 + elem + offset].iov_base =
787 bswap_32 (ctype->plane_size);
788 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
789 break;
790 case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EB):
791 iov[2 + elem + offset].iov_base =
792 (uint32_t *) alloca (sizeof (uint32_t));
793 *(uint32_t *) iov[2 + elem + offset].iov_base =
794 bswap_32 (ctype->plane_cnt);
795 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
796 break;
797#endif
798#if __BYTE_ORDER == __BIG_ENDIAN
799 CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EL,
800 &ctype->plane_size, sizeof (uint32_t));
801 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EL,
802 &ctype->plane_cnt, sizeof (uint32_t));
803#else
804 case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EL):
805 iov[2 + elem + offset].iov_base =
806 (uint32_t *) alloca (sizeof (uint32_t));
807 *(uint32_t *) iov[2 + elem + offset].iov_base =
808 bswap_32 (ctype->plane_size);
809 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
810 break;
811 case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EL):
812 iov[2 + elem + offset].iov_base =
813 (uint32_t *) alloca (sizeof (uint32_t));
814 *(uint32_t *) iov[2 + elem + offset].iov_base =
815 bswap_32 (ctype->plane_cnt);
816 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
817 break;
818#endif
19bc17a9 819
75cd5204
RM
820 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
821 /* The class name array. */
822 total = 0;
823 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
824 {
825 iov[2 + elem + offset].iov_base
826 = (void *) ctype->classnames[cnt];
827 iov[2 + elem + offset].iov_len
828 = strlen (ctype->classnames[cnt]) + 1;
829 total += iov[2 + elem + offset].iov_len;
830 }
ce7a5ef4
RM
831 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
832 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
833 total += 1 + (4 - ((total + 1) % 4));
75cd5204 834
4b10dd6c 835 idx[elem + 1] = idx[elem] + total;
75cd5204
RM
836 break;
837
838 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
839 /* The class name array. */
840 total = 0;
841 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
842 {
843 iov[2 + elem + offset].iov_base
844 = (void *) ctype->mapnames[cnt];
845 iov[2 + elem + offset].iov_len
846 = strlen (ctype->mapnames[cnt]) + 1;
847 total += iov[2 + elem + offset].iov_len;
848 }
ce7a5ef4
RM
849 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
850 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
851 total += 1 + (4 - ((total + 1) % 4));
75cd5204 852
4b10dd6c 853 idx[elem + 1] = idx[elem] + total;
75cd5204 854 break;
19bc17a9
RM
855
856 CTYPE_DATA (_NL_CTYPE_WIDTH,
75cd5204 857 ctype->width, ctype->plane_size * ctype->plane_cnt);
19bc17a9 858
0200214b 859 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
4b10dd6c 860 &ctype->mb_cur_max, sizeof (uint32_t));
0200214b 861
ce7a5ef4
RM
862 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
863 total = strlen (ctype->codeset_name) + 1;
864 if (total % 4 == 0)
865 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
866 else
867 {
868 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
9756dfe1
UD
869 memset (mempcpy (iov[2 + elem + offset].iov_base,
870 ctype->codeset_name, total),
871 '\0', 4 - (total & 3));
ce7a5ef4
RM
872 total = (total + 3) & ~3;
873 }
874 iov[2 + elem + offset].iov_len = total;
4b10dd6c
UD
875 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
876 break;
877
878 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB):
879 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL):
880 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
881 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
882 if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB)
883 && __BYTE_ORDER == __BIG_ENDIAN)
884 || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL)
885 && __BYTE_ORDER == __LITTLE_ENDIAN))
886 *(uint32_t *) iov[2 + elem + offset].iov_base =
887 ctype->mbdigits_act / 10;
888 else
889 *(uint32_t *) iov[2 + elem + offset].iov_base =
890 bswap_32 (ctype->mbdigits_act / 10);
891 break;
892
893 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB):
894 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL):
895 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
896 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
897 if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB)
898 && __BYTE_ORDER == __BIG_ENDIAN)
899 || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL)
900 && __BYTE_ORDER == __LITTLE_ENDIAN))
901 *(uint32_t *) iov[2 + elem + offset].iov_base =
902 ctype->wcdigits_act / 10;
903 else
904 *(uint32_t *) iov[2 + elem + offset].iov_base =
905 bswap_32 (ctype->wcdigits_act / 10);
906 break;
907
908 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
909 /* Compute the length of all possible characters. For INDIGITS
910 there might be more than one. We simply concatenate all of
911 them with a NUL byte following. The NUL byte wouldn't be
912 necessary but it makes it easier for the user. */
913 total = 0;
914 for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
915 cnt < ctype->mbdigits_act; cnt += 10)
916 total += ctype->mbdigits[cnt]->nbytes + 1;
917 iov[2 + elem + offset].iov_base = (char *) alloca (total);
918 iov[2 + elem + offset].iov_len = total;
919
920 cp = iov[2 + elem + offset].iov_base;
921 for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
922 cnt < ctype->mbdigits_act; cnt += 10)
923 {
924 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
925 ctype->mbdigits[cnt]->nbytes);
926 *cp++ = '\0';
927 }
928 break;
929
930 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
931 /* Compute the length of all possible characters. For INDIGITS
932 there might be more than one. We simply concatenate all of
933 them with a NUL byte following. The NUL byte wouldn't be
934 necessary but it makes it easier for the user. */
935 cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
936 total = ctype->mboutdigits[cnt]->nbytes + 1;
937 iov[2 + elem + offset].iov_base = (char *) alloca (total);
938 iov[2 + elem + offset].iov_len = total;
939
940 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
941 ctype->mbdigits[cnt]->bytes,
942 ctype->mbdigits[cnt]->nbytes) = '\0';
943 break;
944
945 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EB):
946 total = ctype->wcdigits_act / 10;
947
948 iov[2 + elem + offset].iov_base =
949 (uint32_t *) alloca (total * sizeof (uint32_t));
950 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
951
952 for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EB;
953 cnt < ctype->wcdigits_act; cnt += 10)
954 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
955 = (__BYTE_ORDER == __LITTLE_ENDIAN
956 ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]);
957 break;
958
959 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EL):
960 total = ctype->wcdigits_act / 10;
961
962 iov[2 + elem + offset].iov_base =
963 (uint32_t *) alloca (total * sizeof (uint32_t));
964 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
965
966 for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EL;
967 cnt < ctype->wcdigits_act; cnt += 10)
968 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
969 = (__BYTE_ORDER == __BIG_ENDIAN
970 ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]);
971 break;
972
973#if __BYTE_ORDER == __BIG_ENDIAN
974 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB):
975 cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB;
976#else
977 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL):
978 cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL;
979#endif
980 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
981 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
982 break;
983
984#if __BYTE_ORDER == __LITTLE_ENDIAN
985 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB):
986 cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB;
987#else
988 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL):
989 cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL;
990#endif
991 iov[2 + elem + offset].iov_base =
992 (uint32_t *) alloca (sizeof (uint32_t));
993 *(uint32_t *) iov[2 + elem + offset].iov_base =
994 bswap_32 (ctype->wcoutdigits[cnt]);
995 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
ce7a5ef4 996 break;
6990326c 997
19bc17a9
RM
998 default:
999 assert (! "unknown CTYPE element");
1000 }
1001 else
1002 {
1003 /* Handle extra maps. */
1004 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
1005
1006 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
75cd5204 1007 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
19bc17a9 1008 else
75cd5204 1009 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
19bc17a9 1010
75cd5204
RM
1011 iov[2 + elem + offset].iov_len = ((ctype->plane_size
1012 * ctype->plane_cnt + 128)
4b10dd6c 1013 * sizeof (uint32_t));
19bc17a9 1014
4b10dd6c 1015 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
75cd5204 1016 }
19bc17a9 1017 }
19bc17a9 1018
75cd5204
RM
1019 assert (2 + elem + offset == (nelems + ctype->nr_charclass
1020 + ctype->map_collection_nr + 2));
19bc17a9 1021
75cd5204 1022 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
1023}
1024
1025
4b10dd6c
UD
1026/* Local functions. */
1027static void
1028ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1029 const char *name)
19bc17a9 1030{
4b10dd6c 1031 size_t cnt;
19bc17a9 1032
4b10dd6c
UD
1033 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1034 if (strcmp (ctype->classnames[cnt], name) == 0)
1035 break;
19bc17a9 1036
4b10dd6c
UD
1037 if (cnt < ctype->nr_charclass)
1038 {
1039 lr_error (lr, _("character class `%s' already defined"), name);
1040 return;
1041 }
19bc17a9 1042
4b10dd6c
UD
1043 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1044 /* Exit code 2 is prescribed in P1003.2b. */
1045 error (2, 0, _("\
1046implementation limit: no more than %d character classes allowed"),
1047 MAX_NR_CHARCLASS);
19bc17a9 1048
4b10dd6c 1049 ctype->classnames[ctype->nr_charclass++] = name;
19bc17a9
RM
1050}
1051
1052
4b10dd6c
UD
1053static void
1054ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1055 const char *name, struct charmap_t *charmap)
19bc17a9 1056{
4b10dd6c 1057 size_t max_chars = 0;
ba1ffaa1 1058 size_t cnt;
19bc17a9 1059
4b10dd6c 1060 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
19bc17a9 1061 {
4b10dd6c
UD
1062 if (strcmp (ctype->mapnames[cnt], name) == 0)
1063 break;
1064
1065 if (max_chars < ctype->map_collection_max[cnt])
1066 max_chars = ctype->map_collection_max[cnt];
19bc17a9
RM
1067 }
1068
4b10dd6c
UD
1069 if (cnt < ctype->map_collection_nr)
1070 {
1071 lr_error (lr, _("character map `%s' already defined"), name);
1072 return;
1073 }
19bc17a9 1074
4b10dd6c
UD
1075 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1076 /* Exit code 2 is prescribed in P1003.2b. */
1077 error (2, 0, _("\
1078implementation limit: no more than %d character maps allowed"),
1079 MAX_NR_CHARMAP);
19bc17a9 1080
4b10dd6c
UD
1081 ctype->mapnames[cnt] = name;
1082
1083 if (max_chars == 0)
1084 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1085 else
1086 ctype->map_collection_max[cnt] = max_chars;
1087
1088 ctype->map_collection[cnt] = (uint32_t *)
1089 xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1090 memset (ctype->map_collection[cnt], '\0',
1091 sizeof (uint32_t) * ctype->map_collection_max[cnt]);
1092 ctype->map_collection_act[cnt] = 256;
19bc17a9 1093
4b10dd6c 1094 ++ctype->map_collection_nr;
19bc17a9
RM
1095}
1096
1097
4b10dd6c
UD
1098/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1099 is possible if we only want ot extend the name array. */
1100static uint32_t *
1101find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1102 size_t *act, uint32_t idx)
19bc17a9 1103{
4b10dd6c 1104 size_t cnt;
19bc17a9 1105
4b10dd6c
UD
1106 if (idx < 256)
1107 return table == NULL ? NULL : &(*table)[idx];
19bc17a9 1108
4b10dd6c
UD
1109 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1110 if (ctype->charnames[cnt] == idx)
1111 break;
19bc17a9 1112
4b10dd6c
UD
1113 /* We have to distinguish two cases: the name is found or not. */
1114 if (cnt == ctype->charnames_act)
1115 {
1116 /* Extend the name array. */
1117 if (ctype->charnames_act == ctype->charnames_max)
1118 {
1119 ctype->charnames_max *= 2;
1120 ctype->charnames = (unsigned int *)
1121 xrealloc (ctype->charnames,
1122 sizeof (unsigned int) * ctype->charnames_max);
1123 }
1124 ctype->charnames[ctype->charnames_act++] = idx;
1125 }
19bc17a9 1126
4b10dd6c
UD
1127 if (table == NULL)
1128 /* We have done everything we are asked to do. */
1129 return NULL;
19bc17a9 1130
4b10dd6c
UD
1131 if (cnt >= *act)
1132 {
1133 if (cnt >= *max)
1134 {
1135 size_t old_max = *max;
1136 do
1137 *max *= 2;
1138 while (*max <= cnt);
19bc17a9 1139
4b10dd6c
UD
1140 *table =
1141 (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
1142 memset (&(*table)[old_max], '\0',
1143 (*max - old_max) * sizeof (uint32_t));
1144 }
19bc17a9 1145
4b10dd6c
UD
1146 *act = cnt;
1147 }
19bc17a9 1148
4b10dd6c 1149 return &(*table)[cnt];
19bc17a9
RM
1150}
1151
1152
4b10dd6c
UD
1153static int
1154get_character (struct token *now, struct charmap_t *charmap,
1155 struct repertoire_t *repertoire,
1156 struct charseq **seqp, uint32_t *wchp)
19bc17a9 1157{
4b10dd6c
UD
1158 if (now->tok == tok_bsymbol)
1159 {
1160 /* This will hopefully be the normal case. */
1161 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1162 now->val.str.lenmb);
1163 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1164 now->val.str.lenmb);
1165 }
1166 else if (now->tok == tok_ucs4)
1167 {
1168 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
19bc17a9 1169
4b10dd6c
UD
1170 if (*seqp == NULL)
1171 {
1172 /* Compute the value in the charmap from the UCS value. */
1173 const char *symbol = repertoire_find_symbol (repertoire,
1174 now->val.ucs4);
19bc17a9 1175
4b10dd6c
UD
1176 if (symbol == NULL)
1177 *seqp = NULL;
1178 else
1179 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1180
4b10dd6c
UD
1181 if (*seqp == NULL)
1182 {
1183 /* Insert a negative entry. */
1184 static const struct charseq negative
1185 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1186 uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1187 *newp = now->val.ucs4;
1188
1189 insert_entry (&repertoire->seq_table, newp, 4,
1190 (void *) &negative);
1191 }
1192 else
1193 (*seqp)->ucs4 = now->val.ucs4;
1194 }
1195 else if ((*seqp)->ucs4 != now->val.ucs4)
1196 *seqp = NULL;
19bc17a9 1197
4b10dd6c
UD
1198 *wchp = now->val.ucs4;
1199 }
1200 else if (now->tok == tok_charcode)
1201 {
1202 /* We must map from the byte code to UCS4. */
1203 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1204 now->val.str.lenmb);
19bc17a9 1205
4b10dd6c
UD
1206 if (*seqp == NULL)
1207 *wchp = ILLEGAL_CHAR_VALUE;
1208 else
1209 {
1210 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1211 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1212 strlen ((*seqp)->name));
1213 *wchp = (*seqp)->ucs4;
1214 }
1215 }
1216 else
1217 return 1;
19bc17a9
RM
1218
1219 return 0;
1220}
1221
1222
4b10dd6c
UD
1223/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */
1224static void
1225charclass_symbolic_ellipsis (struct linereader *ldfile,
1226 struct locale_ctype_t *ctype,
1227 struct charmap_t *charmap,
1228 struct repertoire_t *repertoire,
1229 struct token *now,
1230 const char *last_str,
1231 unsigned long int class256_bit,
1232 unsigned long int class_bit, int base,
1233 int ignore_content, int handle_digits)
19bc17a9 1234{
4b10dd6c
UD
1235 const char *nowstr = now->val.str.startmb;
1236 char tmp[now->val.str.lenmb + 1];
1237 const char *cp;
1238 char *endp;
1239 unsigned long int from;
1240 unsigned long int to;
19bc17a9 1241
4b10dd6c
UD
1242 /* We have to compute the ellipsis values using the symbolic names. */
1243 assert (last_str != NULL);
1244
1245 if (strlen (last_str) != now->val.str.lenmb)
19bc17a9 1246 {
4b10dd6c
UD
1247 invalid_range:
1248 lr_error (ldfile,
1249 _("`%s' and `%s' are no valid names for symbolic range"),
1250 last_str, nowstr);
1251 return;
19bc17a9
RM
1252 }
1253
4b10dd6c
UD
1254 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1255 /* Nothing to do, the names are the same. */
1256 return;
19bc17a9 1257
4b10dd6c
UD
1258 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1259 ;
19bc17a9 1260
4b10dd6c
UD
1261 errno = 0;
1262 from = strtoul (cp, &endp, base);
1263 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1264 goto invalid_range;
19bc17a9 1265
4b10dd6c
UD
1266 to = strtoul (nowstr + (cp - last_str), &endp, base);
1267 if ((to == UINT_MAX && errno == ERANGE) || *endp != '\0' || from >= to)
1268 goto invalid_range;
19bc17a9 1269
4b10dd6c
UD
1270 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1271 if (!ignore_content)
1272 {
1273 now->val.str.startmb = tmp;
1274 while (++from <= to)
1275 {
1276 struct charseq *seq;
1277 uint32_t wch;
19bc17a9 1278
4b10dd6c
UD
1279 sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1280 last_str, now->val.str.lenmb - (cp - last_str), from);
19bc17a9 1281
4b10dd6c
UD
1282 get_character (now, charmap, repertoire, &seq, &wch);
1283
1284 if (seq != NULL && seq->nbytes == 1)
1285 /* Yep, we can store information about this byte sequence. */
1286 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
19bc17a9 1287
4b10dd6c
UD
1288 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1289 /* We have the UCS4 position. */
1290 *find_idx (ctype, &ctype->class_collection,
1291 &ctype->class_collection_max,
1292 &ctype->class_collection_act, wch) |= class_bit;
19bc17a9 1293
4b10dd6c
UD
1294 if (handle_digits == 1)
1295 {
1296 /* We must store the digit values. */
1297 if (ctype->mbdigits_act == ctype->mbdigits_max)
1298 {
1299 ctype->mbdigits_max *= 2;
1300 ctype->mbdigits = xrealloc (ctype->mbdigits,
1301 (ctype->mbdigits_max
1302 * sizeof (char *)));
1303 ctype->wcdigits_max *= 2;
1304 ctype->wcdigits = xrealloc (ctype->wcdigits,
1305 (ctype->wcdigits_max
1306 * sizeof (uint32_t)));
1307 }
1308
1309 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1310 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1311 }
1312 else if (handle_digits == 2)
1313 {
1314 /* We must store the digit values. */
1315 if (ctype->outdigits_act >= 10)
1316 {
1317 lr_error (ldfile, _("\
1318%s: field `%s' does not contain exactly ten entries"),
1319 "LC_CTYPE", "outdigit");
1320 return;
1321 }
1322
1323 ctype->mboutdigits[ctype->outdigits_act] = seq;
1324 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1325 ++ctype->outdigits_act;
1326 }
1327 }
1328 }
19bc17a9
RM
1329}
1330
1331
4b10dd6c
UD
1332/* Ellipsis like in `<U1234>..<U2345>'. */
1333static void
1334charclass_ucs4_ellipsis (struct linereader *ldfile,
1335 struct locale_ctype_t *ctype,
1336 struct charmap_t *charmap,
1337 struct repertoire_t *repertoire,
1338 struct token *now, uint32_t last_wch,
1339 unsigned long int class256_bit,
1340 unsigned long int class_bit, int ignore_content,
1341 int handle_digits)
19bc17a9 1342{
4b10dd6c 1343 if (last_wch > now->val.ucs4)
19bc17a9 1344 {
4b10dd6c
UD
1345 lr_error (ldfile, _("\
1346to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1347 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1348 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
19bc17a9
RM
1349 return;
1350 }
1351
4b10dd6c
UD
1352 if (!ignore_content)
1353 while (++last_wch <= now->val.ucs4)
1354 {
1355 /* We have to find out whether there is a byte sequence corresponding
1356 to this UCS4 value. */
1357 struct charseq *seq = repertoire_find_seq (repertoire, last_wch);
19bc17a9 1358
4b10dd6c
UD
1359 /* If this is the first time we look for this sequence create a new
1360 entry. */
1361 if (seq == NULL)
1362 {
1363 /* Find the symbolic name for this UCS4 value. */
1364 const char *symbol = repertoire_find_symbol (repertoire, last_wch);
1365 uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
1366 *newp = last_wch;
19bc17a9 1367
4b10dd6c
UD
1368 if (symbol != NULL)
1369 /* We have a name, now search the multibyte value. */
1370 seq = charmap_find_value (charmap, symbol, strlen (symbol));
19bc17a9 1371
4b10dd6c
UD
1372 if (seq == NULL)
1373 {
1374 /* We have to create a fake entry. */
1375 static const struct charseq negative
1376 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1377 seq = (struct charseq *) &negative;
1378 }
1379 else
1380 seq->ucs4 = last_wch;
1381
1382 insert_entry (&repertoire->seq_table, newp, 4, seq);
1383 }
1384
1385 /* We have a name, now search the multibyte value. */
1386 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1387 /* Yep, we can store information about this byte sequence. */
1388 ctype->class256_collection[(size_t) seq->bytes[0]]
1389 |= class256_bit;
1390
1391 /* And of course we have the UCS4 position. */
1392 if (class_bit != 0 && class_bit != 0)
1393 *find_idx (ctype, &ctype->class_collection,
1394 &ctype->class_collection_max,
1395 &ctype->class_collection_act, last_wch) |= class_bit;
1396
1397 if (handle_digits == 1)
1398 {
1399 /* We must store the digit values. */
1400 if (ctype->mbdigits_act == ctype->mbdigits_max)
1401 {
1402 ctype->mbdigits_max *= 2;
1403 ctype->mbdigits = xrealloc (ctype->mbdigits,
1404 (ctype->mbdigits_max
1405 * sizeof (char *)));
1406 ctype->wcdigits_max *= 2;
1407 ctype->wcdigits = xrealloc (ctype->wcdigits,
1408 (ctype->wcdigits_max
1409 * sizeof (uint32_t)));
1410 }
1411
1412 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1413 ? seq : NULL);
1414 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1415 }
1416 else if (handle_digits == 2)
1417 {
1418 /* We must store the digit values. */
1419 if (ctype->outdigits_act >= 10)
1420 {
1421 lr_error (ldfile, _("\
1422%s: field `%s' does not contain exactly ten entries"),
1423 "LC_CTYPE", "outdigit");
1424 return;
1425 }
19bc17a9 1426
4b10dd6c
UD
1427 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1428 ? seq : NULL);
1429 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1430 ++ctype->outdigits_act;
1431 }
1432 }
19bc17a9
RM
1433}
1434
1435
4b10dd6c 1436/* Ellipsis as in `/xea/x12.../xea/x34'. */
19bc17a9 1437static void
4b10dd6c
UD
1438charclass_charcode_ellipsis (struct linereader *ldfile,
1439 struct locale_ctype_t *ctype,
1440 struct charmap_t *charmap,
1441 struct repertoire_t *repertoire,
1442 struct token *now, char *last_charcode,
1443 uint32_t last_charcode_len,
1444 unsigned long int class256_bit,
1445 unsigned long int class_bit, int ignore_content,
1446 int handle_digits)
19bc17a9 1447{
4b10dd6c
UD
1448 /* First check whether the to-value is larger. */
1449 if (now->val.charcode.nbytes != last_charcode_len)
1450 {
1451 lr_error (ldfile, _("\
1452start end end character sequence of range must have the same length"));
1453 return;
1454 }
19bc17a9 1455
4b10dd6c 1456 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
19bc17a9 1457 {
4b10dd6c
UD
1458 lr_error (ldfile, _("\
1459to-value character sequence is smaller than from-value sequence"));
19bc17a9
RM
1460 return;
1461 }
1462
4b10dd6c
UD
1463 if (!ignore_content)
1464 {
1465 do
1466 {
1467 /* Increment the byte sequence value. */
1468 struct charseq *seq;
1469 uint32_t wch;
1470 int i;
1471
1472 for (i = last_charcode_len - 1; i >= 0; --i)
1473 if (++last_charcode[i] != 0)
1474 break;
1475
1476 if (last_charcode_len == 1)
1477 /* Of course we have the charcode value. */
1478 ctype->class256_collection[(size_t) last_charcode[0]]
1479 |= class256_bit;
1480
1481 /* Find the symbolic name. */
1482 seq = charmap_find_symbol (charmap, last_charcode,
1483 last_charcode_len);
1484 if (seq != NULL)
1485 {
1486 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1487 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1488 strlen (seq->name));
1489 wch = seq->ucs4;
1490
1491 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1492 *find_idx (ctype, &ctype->class_collection,
1493 &ctype->class_collection_max,
1494 &ctype->class_collection_act, wch) |= class_bit;
1495 }
1496 else
1497 wch = ILLEGAL_CHAR_VALUE;
19bc17a9 1498
4b10dd6c
UD
1499 if (handle_digits == 1)
1500 {
1501 /* We must store the digit values. */
1502 if (ctype->mbdigits_act == ctype->mbdigits_max)
1503 {
1504 ctype->mbdigits_max *= 2;
1505 ctype->mbdigits = xrealloc (ctype->mbdigits,
1506 (ctype->mbdigits_max
1507 * sizeof (char *)));
1508 ctype->wcdigits_max *= 2;
1509 ctype->wcdigits = xrealloc (ctype->wcdigits,
1510 (ctype->wcdigits_max
1511 * sizeof (uint32_t)));
1512 }
1513
1514 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1515 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1516 seq->nbytes = last_charcode_len;
1517
1518 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1519 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1520 }
1521 else if (handle_digits == 2)
1522 {
1523 struct charseq *seq;
1524 /* We must store the digit values. */
1525 if (ctype->outdigits_act >= 10)
1526 {
1527 lr_error (ldfile, _("\
1528%s: field `%s' does not contain exactly ten entries"),
1529 "LC_CTYPE", "outdigit");
1530 return;
1531 }
1532
1533 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1534 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1535 seq->nbytes = last_charcode_len;
1536
1537 ctype->mboutdigits[ctype->outdigits_act] = seq;
1538 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1539 ++ctype->outdigits_act;
1540 }
1541 }
1542 while (memcmp (last_charcode, now->val.charcode.bytes,
1543 last_charcode_len) != 0);
1544 }
19bc17a9
RM
1545}
1546
1547
4b10dd6c
UD
1548/* Read one transliteration entry. */
1549static uint32_t *
1550read_widestring (struct linereader *ldfile, struct token *now,
1551 struct charmap_t *charmap, struct repertoire_t *repertoire)
19bc17a9 1552{
4b10dd6c 1553 uint32_t *wstr;
19bc17a9 1554
4b10dd6c
UD
1555 if (now->tok == tok_default_missing)
1556 /* The special name "" will denote this case. */
1557 wstr = (uint32_t *) L"";
1558 else if (now->tok == tok_bsymbol)
19bc17a9 1559 {
4b10dd6c
UD
1560 /* Get the value from the repertoire. */
1561 wstr = xmalloc (2 * sizeof (uint32_t));
1562 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1563 now->val.str.lenmb);
1564 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1565 /* We cannot proceed, we don't know the UCS4 value. */
1566 return NULL;
1567
1568 wstr[1] = 0;
19bc17a9 1569 }
4b10dd6c 1570 else if (now->tok == tok_ucs4)
19bc17a9 1571 {
4b10dd6c
UD
1572 wstr = xmalloc (2 * sizeof (uint32_t));
1573 wstr[0] = now->val.ucs4;
1574 wstr[1] = 0;
1575 }
1576 else if (now->tok == tok_charcode)
1577 {
1578 /* Argh, we have to convert to the symbol name first and then to the
1579 UCS4 value. */
1580 struct charseq *seq = charmap_find_symbol (charmap,
1581 now->val.str.startmb,
1582 now->val.str.lenmb);
1583 if (seq == NULL)
1584 /* Cannot find the UCS4 value. */
1585 return NULL;
1586
1587 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1588 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1589 strlen (seq->name));
1590 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1591 /* We cannot proceed, we don't know the UCS4 value. */
1592 return NULL;
1593
1594 wstr = xmalloc (2 * sizeof (uint32_t));
1595 wstr[0] = seq->ucs4;
1596 wstr[1] = 0;
1597 }
1598 else if (now->tok == tok_string)
1599 {
1600 wstr = now->val.str.startwc;
1601 if (wstr[0] == 0)
1602 return NULL;
1603 }
1604 else
1605 {
1606 if (now->tok != tok_eol && now->tok != tok_eof)
1607 lr_ignore_rest (ldfile, 0);
1608 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1609 return (uint32_t *) -1l;
19bc17a9
RM
1610 }
1611
4b10dd6c
UD
1612 return wstr;
1613}
19bc17a9 1614
19bc17a9 1615
4b10dd6c
UD
1616static void
1617read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1618 struct token *now, struct charmap_t *charmap,
1619 struct repertoire_t *repertoire)
1620{
1621 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1622 struct translit_t *result;
1623 struct translit_to_t **top;
1624 struct obstack *ob = &ctype->mem_pool;
1625 int first;
1626 int ignore;
1627
1628 if (from_wstr == NULL)
1629 /* There is no valid from string. */
1630 return;
19bc17a9 1631
4b10dd6c
UD
1632 result = (struct translit_t *) obstack_alloc (ob,
1633 sizeof (struct translit_t));
1634 result->from = from_wstr;
1635 result->next = NULL;
1636 result->to = NULL;
1637 top = &result->to;
1638 first = 1;
1639 ignore = 0;
1640
1641 while (1)
1642 {
1643 uint32_t *to_wstr;
1644
1645 /* Next we have one or more transliterations. They are
1646 separated by semicolons. */
1647 now = lr_token (ldfile, charmap, repertoire);
1648
1649 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1650 {
1651 /* One string read. */
1652 const uint32_t zero = 0;
1653
1654 if (!ignore)
1655 {
1656 obstack_grow (ob, &zero, 4);
1657 to_wstr = obstack_finish (ob);
1658
1659 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1660 (*top)->str = to_wstr;
1661 (*top)->next = NULL;
1662 }
1663
1664 if (now->tok == tok_eol)
1665 {
1666 result->next = ctype->translit;
1667 ctype->translit = result;
1668 return;
1669 }
1670
1671 if (!ignore)
1672 top = &(*top)->next;
1673 ignore = 0;
1674 }
1675 else
1676 {
1677 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1678 if (to_wstr == (uint32_t *) -1l)
1679 {
1680 /* An error occurred. */
1681 obstack_free (ob, result);
1682 return;
1683 }
1684
1685 if (to_wstr == NULL)
1686 ignore = 1;
1687 else
1688 /* This value is usable. */
1689 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
19bc17a9 1690
4b10dd6c
UD
1691 first = 0;
1692 }
1693 }
19bc17a9
RM
1694}
1695
1696
4b10dd6c
UD
1697/* The parser for the LC_CTYPE section of the locale definition. */
1698void
1699ctype_read (struct linereader *ldfile, struct localedef_t *result,
1700 struct charmap_t *charmap, const char *repertoire_name,
1701 int ignore_content)
19bc17a9 1702{
4b10dd6c
UD
1703 struct repertoire_t *repertoire = NULL;
1704 struct locale_ctype_t *ctype;
1705 struct token *now;
1706 enum token_t nowtok;
19bc17a9 1707 size_t cnt;
4b10dd6c
UD
1708 struct charseq *last_seq;
1709 uint32_t last_wch = 0;
1710 enum token_t last_token;
1711 enum token_t ellipsis_token;
1712 char last_charcode[16];
1713 size_t last_charcode_len = 0;
1714 const char *last_str = NULL;
1715 int mapidx;
19bc17a9 1716
4b10dd6c
UD
1717 /* Get the repertoire we have to use. */
1718 if (repertoire_name != NULL)
1719 repertoire = repertoire_read (repertoire_name);
19bc17a9 1720
4b10dd6c
UD
1721 /* The rest of the line containing `LC_CTYPE' must be free. */
1722 lr_ignore_rest (ldfile, 1);
19bc17a9 1723
4b10dd6c
UD
1724
1725 do
19bc17a9 1726 {
4b10dd6c
UD
1727 now = lr_token (ldfile, charmap, NULL);
1728 nowtok = now->tok;
19bc17a9 1729 }
4b10dd6c 1730 while (nowtok == tok_eol);
19bc17a9 1731
4b10dd6c
UD
1732 /* If we see `copy' now we are almost done. */
1733 if (nowtok == tok_copy)
1734 {
1735 handle_copy (ldfile, charmap, repertoire, tok_lc_ctype, LC_CTYPE,
1736 "LC_CTYPE", ignore_content);
1737 return;
1738 }
75cd5204 1739
4b10dd6c
UD
1740 /* Prepare the data structures. */
1741 ctype_startup (ldfile, result, charmap, ignore_content);
1742 ctype = result->categories[LC_CTYPE].ctype;
1743
1744 /* Remember the repertoire we use. */
1745 if (!ignore_content)
1746 ctype->repertoire = repertoire;
1747
1748 while (1)
19bc17a9 1749 {
4b10dd6c
UD
1750 unsigned long int class_bit = 0;
1751 unsigned long int class256_bit = 0;
1752 int handle_digits = 0;
1753
1754 /* Of course we don't proceed beyond the end of file. */
1755 if (nowtok == tok_eof)
1756 break;
1757
1758 /* Ingore empty lines. */
1759 if (nowtok == tok_eol)
19bc17a9 1760 {
4b10dd6c
UD
1761 now = lr_token (ldfile, charmap, NULL);
1762 nowtok = now->tok;
1763 continue;
1764 }
19bc17a9 1765
4b10dd6c
UD
1766 switch (nowtok)
1767 {
1768 case tok_class:
1769 /* We simply forget the `class' keyword and use the following
1770 operand to determine the bit. */
1771 now = lr_token (ldfile, charmap, NULL);
1772 if (now->tok == tok_ident || now->tok == tok_string)
1773 {
1774 /* Must be one of the predefined class names. */
1775 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1776 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
1777 break;
1778 if (cnt >= ctype->nr_charclass)
1779 {
1780 if (now->val.str.lenmb == 8
1781 && memcmp ("special1", now->val.str.startmb, 8) == 0)
1782 class_bit = _ISwspecial1;
1783 else if (now->val.str.lenmb == 8
1784 && memcmp ("special2", now->val.str.startmb, 8) == 0)
1785 class_bit = _ISwspecial2;
1786 else if (now->val.str.lenmb == 8
1787 && memcmp ("special3", now->val.str.startmb, 8) == 0)
1788 class_bit = _ISwspecial3;
1789 else
1790 {
1791 lr_error (ldfile, _("\
1792unknown character class `%s' in category `LC_CTYPE'"),
1793 now->val.str.startmb);
1794 free (now->val.str.startmb);
1795
1796 lr_ignore_rest (ldfile, 0);
1797 continue;
1798 }
1799 }
1800 else
1801 class_bit = _ISwbit (cnt);
1802
1803 free (now->val.str.startmb);
1804 }
1805 else if (now->tok == tok_digit)
1806 goto handle_tok_digit;
1807 else if (now->tok < tok_upper || now->tok > tok_blank)
1808 goto err_label;
1809 else
1810 {
1811 class_bit = BITw (now->tok);
1812 class256_bit = BIT (now->tok);
1813 }
1814
1815 /* The next character must be a semicolon. */
1816 now = lr_token (ldfile, charmap, NULL);
1817 if (now->tok != tok_semicolon)
1818 goto err_label;
1819 goto read_charclass;
1820
1821 case tok_upper:
1822 case tok_lower:
1823 case tok_alpha:
1824 case tok_alnum:
1825 case tok_space:
1826 case tok_cntrl:
1827 case tok_punct:
1828 case tok_graph:
1829 case tok_print:
1830 case tok_xdigit:
1831 case tok_blank:
1832 class_bit = BITw (now->tok);
1833 class256_bit = BIT (now->tok);
1834 handle_digits = 0;
1835 read_charclass:
1836 ctype->class_done |= class_bit;
1837 last_token = tok_none;
1838 ellipsis_token = tok_none;
1839 now = lr_token (ldfile, charmap, NULL);
1840 while (now->tok != tok_eol && now->tok != tok_eof)
1841 {
1842 uint32_t wch;
1843 struct charseq *seq;
1844
1845 if (ellipsis_token == tok_none)
1846 {
1847 if (get_character (now, charmap, repertoire, &seq, &wch))
1848 goto err_label;
1849
1850 if (!ignore_content && seq != NULL && seq->nbytes == 1)
1851 /* Yep, we can store information about this byte
1852 sequence. */
1853 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1854
1855 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
1856 && class_bit != 0)
1857 /* We have the UCS4 position. */
1858 *find_idx (ctype, &ctype->class_collection,
1859 &ctype->class_collection_max,
1860 &ctype->class_collection_act, wch) |= class_bit;
1861
1862 last_token = now->tok;
1863 last_str = now->val.str.startmb;
1864 last_seq = seq;
1865 last_wch = wch;
1866 memcpy (last_charcode, now->val.charcode.bytes, 16);
1867 last_charcode_len = now->val.charcode.nbytes;
1868
1869 if (!ignore_content && handle_digits == 1)
1870 {
1871 /* We must store the digit values. */
1872 if (ctype->mbdigits_act == ctype->mbdigits_max)
1873 {
1874 ctype->mbdigits_max *= 2;
1875 ctype->mbdigits = xrealloc (ctype->mbdigits,
1876 (ctype->mbdigits_max
1877 * sizeof (char *)));
1878 ctype->wcdigits_max *= 2;
1879 ctype->wcdigits = xrealloc (ctype->wcdigits,
1880 (ctype->wcdigits_max
1881 * sizeof (uint32_t)));
1882 }
1883
1884 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1885 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1886 }
1887 else if (!ignore_content && handle_digits == 2)
1888 {
1889 /* We must store the digit values. */
1890 if (ctype->outdigits_act >= 10)
1891 {
1892 lr_error (ldfile, _("\
1893%s: field `%s' does not contain exactly ten entries"),
1894 "LC_CTYPE", "outdigit");
1895 goto err_label;
1896 }
1897
1898 ctype->mboutdigits[ctype->outdigits_act] = seq;
1899 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1900 ++ctype->outdigits_act;
1901 }
1902 }
1903 else
1904 {
1905 /* Now it gets complicated. We have to resolve the
1906 ellipsis problem. First we must distinguish between
1907 the different kind of ellipsis and this must match the
1908 tokens we have seen. */
1909 assert (last_token != tok_none);
1910
1911 if (last_token != now->tok)
1912 {
1913 lr_error (ldfile, _("\
1914ellipsis range must be marked by two operands of same type"));
1915 lr_ignore_rest (ldfile, 0);
1916 break;
1917 }
1918
1919 if (last_token == tok_bsymbol)
1920 {
1921 if (ellipsis_token == tok_ellipsis3)
1922 lr_error (ldfile, _("with symbolic name range values \
1923the absolute ellipsis `...' must not be used"));
1924
1925 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
1926 repertoire, now, last_str,
1927 class256_bit, class_bit,
1928 (ellipsis_token
1929 == tok_ellipsis4
1930 ? 10 : 16),
1931 ignore_content,
1932 handle_digits);
1933 }
1934 else if (last_token == tok_ucs4)
1935 {
1936 if (ellipsis_token != tok_ellipsis2)
1937 lr_error (ldfile, _("\
1938with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
1939
1940 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
1941 repertoire, now, last_wch,
1942 class256_bit, class_bit,
1943 ignore_content, handle_digits);
1944 }
1945 else
1946 {
1947 assert (last_token == tok_charcode);
1948
1949 if (ellipsis_token != tok_ellipsis3)
1950 lr_error (ldfile, _("\
1951with character code range values one must use the absolute ellipsis `...'"));
1952
1953 charclass_charcode_ellipsis (ldfile, ctype, charmap,
1954 repertoire, now,
1955 last_charcode,
1956 last_charcode_len,
1957 class256_bit, class_bit,
1958 ignore_content,
1959 handle_digits);
1960 }
1961
1962 /* Now we have used the last value. */
1963 last_token = tok_none;
1964 }
1965
1966 /* Next we expect a semicolon or the end of the line. */
1967 now = lr_token (ldfile, charmap, NULL);
1968 if (now->tok == tok_eol || now->tok == tok_eof)
1969 break;
1970
1971 if (last_token != tok_none
1972 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
1973 {
1974 ellipsis_token = now->tok;
1975 now = lr_token (ldfile, charmap, NULL);
1976 continue;
1977 }
1978
1979 if (now->tok != tok_semicolon)
1980 goto err_label;
1981
1982 /* And get the next character. */
1983 now = lr_token (ldfile, charmap, NULL);
1984
1985 ellipsis_token = tok_none;
1986 }
1987 break;
1988
1989 case tok_digit:
1990 handle_tok_digit:
1991 class_bit = _ISwdigit;
1992 class256_bit = _ISdigit;
1993 handle_digits = 1;
1994 goto read_charclass;
1995
1996 case tok_outdigit:
1997 if (ctype->outdigits_act != 0)
1998 lr_error (ldfile, _("\
1999%s: field `%s' declared more than once"),
2000 "LC_CTYPE", "outdigit");
2001 class_bit = 0;
2002 class256_bit = 0;
2003 handle_digits = 2;
2004 goto read_charclass;
2005
2006 case tok_toupper:
2007 mapidx = 0;
2008 goto read_mapping;
2009
2010 case tok_tolower:
2011 mapidx = 1;
2012 goto read_mapping;
2013
2014 case tok_map:
2015 /* We simply forget the `map' keyword and use the following
2016 operand to determine the mapping. */
2017 now = lr_token (ldfile, charmap, NULL);
2018 if (now->tok == tok_ident || now->tok == tok_string)
2019 {
2020 size_t cnt;
2021
2022 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2023 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2024 break;
2025
2026 if (cnt < ctype->map_collection_nr)
2027 mapidx = cnt;
2028 else
2029 {
2030 lr_error (ldfile, _("unknown map `%s'"),
2031 now->val.str.startmb);
2032 lr_ignore_rest (ldfile, 0);
2033 break;
2034 }
2035 }
2036 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2037 goto err_label;
2038 else
2039 mapidx = now->tok - tok_toupper;
2040
2041 now = lr_token (ldfile, charmap, NULL);
2042 /* This better should be a semicolon. */
2043 if (now->tok != tok_semicolon)
2044 goto err_label;
2045
2046 read_mapping:
2047 /* Test whether this mapping was already defined. */
2048 if (ctype->tomap_done[mapidx])
2049 {
2050 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2051 ctype->mapnames[mapidx]);
2052 lr_ignore_rest (ldfile, 0);
2053 break;
2054 }
2055 ctype->tomap_done[mapidx] = 1;
2056
2057 now = lr_token (ldfile, charmap, NULL);
2058 while (now->tok != tok_eol && now->tok != tok_eof)
2059 {
2060 struct charseq *from_seq;
2061 uint32_t from_wch;
2062 struct charseq *to_seq;
2063 uint32_t to_wch;
2064
2065 /* Every pair starts with an opening brace. */
2066 if (now->tok != tok_open_brace)
2067 goto err_label;
2068
2069 /* Next comes the from-value. */
2070 now = lr_token (ldfile, charmap, NULL);
2071 if (get_character (now, charmap, repertoire, &from_seq,
2072 &from_wch) != 0)
2073 goto err_label;
2074
2075 /* The next is a comma. */
2076 now = lr_token (ldfile, charmap, NULL);
2077 if (now->tok != tok_comma)
2078 goto err_label;
2079
2080 /* And the other value. */
2081 now = lr_token (ldfile, charmap, NULL);
2082 if (get_character (now, charmap, repertoire, &to_seq,
2083 &to_wch) != 0)
2084 goto err_label;
2085
2086 /* And the last thing is the closing brace. */
2087 now = lr_token (ldfile, charmap, NULL);
2088 if (now->tok != tok_close_brace)
2089 goto err_label;
2090
2091 if (!ignore_content)
2092 {
2093 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2094 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2095 /* We can use this value. */
2096 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2097 = to_seq->bytes[0];
2098
2099 if (from_wch != ILLEGAL_CHAR_VALUE
2100 && to_wch != ILLEGAL_CHAR_VALUE)
2101 /* Both correct values. */
2102 *find_idx (ctype, &ctype->map_collection[mapidx],
2103 &ctype->map_collection_max[mapidx],
2104 &ctype->map_collection_act[mapidx],
2105 from_wch) = to_wch;
2106 }
2107
2108 /* Now comes a semicolon or the end of the line/file. */
2109 now = lr_token (ldfile, charmap, NULL);
2110 if (now->tok == tok_semicolon)
2111 now = lr_token (ldfile, charmap, NULL);
2112 }
2113 break;
2114
2115 case tok_translit_start:
2116 /* The rest of the line better should be empty. */
2117 lr_ignore_rest (ldfile, 1);
2118
2119 /* We count here the number of allocated entries in the `translit'
2120 array. */
2121 cnt = 0;
2122
2123 /* We proceed until we see the `translit_end' token. */
2124 while (now = lr_token (ldfile, charmap, repertoire),
2125 now->tok != tok_translit_end && now->tok != tok_eof)
2126 {
2127 if (now->tok == tok_eol)
2128 /* Ignore empty lines. */
2129 continue;
2130
2131 if (now->tok == tok_translit_end)
2132 {
2133 lr_ignore_rest (ldfile, 0);
2134 break;
2135 }
2136
2137 if (now->tok == tok_include)
2138 {
2139 /* We have to include locale. */
2140 const char *locale_name;
2141 const char *repertoire_name;
2142
2143 now = lr_token (ldfile, charmap, NULL);
2144 /* This should be a string or an identifier. In any
2145 case something to name a locale. */
2146 if (now->tok != tok_string && now->tok != tok_ident)
2147 {
2148 translit_syntax:
2149 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2150 lr_ignore_rest (ldfile, 0);
2151 continue;
2152 }
2153 locale_name = now->val.str.startmb;
2154
2155 /* Next should be a semicolon. */
2156 now = lr_token (ldfile, charmap, NULL);
2157 if (now->tok != tok_semicolon)
2158 goto translit_syntax;
2159
2160 /* Now the repertoire name. */
2161 now = lr_token (ldfile, charmap, NULL);
2162 if ((now->tok != tok_string && now->tok != tok_ident)
2163 || now->val.str.startmb == NULL)
2164 goto translit_syntax;
2165 repertoire_name = now->val.str.startmb;
2166
2167 /* We must not have more than one `include'. */
2168 if (ctype->translit_copy_locale != NULL)
2169 {
2170 lr_error (ldfile, _("\
2171%s: only one `include' instruction allowed"), "LC_CTYPE");
2172 lr_ignore_rest (ldfile, 0);
2173 continue;
2174 }
2175
2176 ctype->translit_copy_locale = locale_name;
2177 ctype->translit_copy_repertoire = repertoire_name;
2178
2179 /* The rest of the line must be empty. */
2180 lr_ignore_rest (ldfile, 1);
2181 continue;
2182 }
2183
2184 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2185 }
2186 break;
2187
2188 case tok_ident:
2189 /* This could mean one of several things. First test whether
2190 it's a character class name. */
2191 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2192 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2193 break;
2194 if (cnt < ctype->nr_charclass)
2195 {
2196 class_bit = _ISwbit (cnt);
2197 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2198 free (now->val.str.startmb);
2199 goto read_charclass;
2200 }
2201 if (strcmp (now->val.str.startmb, "special1") == 0)
2202 {
2203 class_bit = _ISwspecial1;
2204 free (now->val.str.startmb);
2205 goto read_charclass;
2206 }
2207 if (strcmp (now->val.str.startmb, "special2") == 0)
2208 {
2209 class_bit = _ISwspecial2;
2210 free (now->val.str.startmb);
2211 goto read_charclass;
2212 }
2213 if (strcmp (now->val.str.startmb, "special3") == 0)
2214 {
2215 class_bit = _ISwspecial3;
2216 free (now->val.str.startmb);
2217 goto read_charclass;
2218 }
2219 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2220 {
2221 mapidx = 2;
2222 goto read_mapping;
2223 }
2224 break;
2225
2226 case tok_end:
2227 /* Next we assume `LC_CTYPE'. */
2228 now = lr_token (ldfile, charmap, NULL);
2229 if (now->tok == tok_eof)
2230 break;
2231 if (now->tok == tok_eol)
2232 lr_error (ldfile, _("%s: incomplete `END' line"),
2233 "LC_CTYPE");
2234 else if (now->tok != tok_lc_ctype)
2235 lr_error (ldfile, _("\
2236%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2237 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2238 return;
2239
2240 default:
2241 err_label:
2242 if (now->tok != tok_eof)
2243 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
19bc17a9
RM
2244 }
2245
4b10dd6c
UD
2246 /* Prepare for the next round. */
2247 now = lr_token (ldfile, charmap, NULL);
2248 nowtok = now->tok;
19bc17a9
RM
2249 }
2250
4b10dd6c
UD
2251 /* When we come here we reached the end of the file. */
2252 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
19bc17a9
RM
2253}
2254
2255
2256static void
4b10dd6c
UD
2257set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2258 struct repertoire_t *repertoire)
19bc17a9 2259{
4b10dd6c
UD
2260 size_t cnt;
2261
19bc17a9
RM
2262 /* These function defines the default values for the classes and conversions
2263 according to POSIX.2 2.5.2.1.
2264 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2265 Don't move them unless you know what you do! */
2266
4b10dd6c 2267 void set_default (int bitpos, int from, int to)
19bc17a9
RM
2268 {
2269 char tmp[2];
2270 int ch;
4b10dd6c
UD
2271 int bit = _ISbit (bitpos);
2272 int bitw = _ISwbit (bitpos);
19bc17a9
RM
2273 /* Define string. */
2274 strcpy (tmp, "?");
2275
2276 for (ch = from; ch <= to; ++ch)
2277 {
4b10dd6c
UD
2278 uint32_t value;
2279 struct charseq *seq;
19bc17a9
RM
2280 tmp[0] = ch;
2281
4b10dd6c
UD
2282 value = repertoire_find_value (repertoire, tmp, 1);
2283 if (value == ILLEGAL_CHAR_VALUE)
19bc17a9 2284 {
880f421f
UD
2285 if (!be_quiet)
2286 error (0, 0, _("\
4b10dd6c
UD
2287%s: character `%s' not defined in repertoire while needed as default value"),
2288 "LC_CTYPE", tmp);
2289 }
2290 else
2291 ELEM (ctype, class_collection, , value) |= bitw;
2292
2293 seq = charmap_find_value (charmap, tmp, 1);
2294 if (seq == NULL)
2295 {
2296 if (!be_quiet)
2297 error (0, 0, _("\
2298%s: character `%s' not defined in charmap while needed as default value"),
2299 "LC_CTYPE", tmp);
19bc17a9 2300 }
4b10dd6c
UD
2301 else if (seq->nbytes != 1)
2302 error (0, 0, _("\
2303%s: character `%s' in charmap not representable with one byte"),
2304 "LC_CTYPE", tmp);
19bc17a9 2305 else
4b10dd6c 2306 ctype->class256_collection[seq->bytes[0]] |= bit;
19bc17a9
RM
2307 }
2308 }
2309
2310 /* Set default values if keyword was not present. */
4b10dd6c 2311 if ((ctype->class_done & BITw (tok_upper)) == 0)
19bc17a9
RM
2312 /* "If this keyword [lower] is not specified, the lowercase letters
2313 `A' through `Z', ..., shall automatically belong to this class,
2314 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2315 set_default (BITPOS (tok_upper), 'A', 'Z');
19bc17a9 2316
4b10dd6c 2317 if ((ctype->class_done & BITw (tok_lower)) == 0)
19bc17a9
RM
2318 /* "If this keyword [lower] is not specified, the lowercase letters
2319 `a' through `z', ..., shall automatically belong to this class,
2320 with implementation defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2321 set_default (BITPOS (tok_lower), 'a', 'z');
19bc17a9 2322
4b10dd6c 2323 if ((ctype->class_done & BITw (tok_alpha)) == 0)
19bc17a9
RM
2324 {
2325 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2326 class `lower' *must* be in class `alpha'. */
2327 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
19bc17a9
RM
2328
2329 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2330 if ((ctype->class_collection[cnt] & mask) != 0)
2331 ctype->class_collection[cnt] |= BIT (tok_alpha);
2332 }
2333
4b10dd6c 2334 if ((ctype->class_done & BITw (tok_digit)) == 0)
19bc17a9
RM
2335 /* "If this keyword [digit] is not specified, the digits `0' through
2336 `9', ..., shall automatically belong to this class, with
2337 implementation-defined character values." [P1003.2, 2.5.2.1] */
4b10dd6c 2338 set_default (BITPOS (tok_digit), '0', '9');
19bc17a9
RM
2339
2340 /* "Only characters specified for the `alpha' and `digit' keyword
2341 shall be specified. Characters specified for the keyword `alpha'
2342 and `digit' are automatically included in this class. */
2343 {
2344 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
19bc17a9
RM
2345
2346 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2347 if ((ctype->class_collection[cnt] & mask) != 0)
2348 ctype->class_collection[cnt] |= BIT (tok_alnum);
2349 }
2350
4b10dd6c 2351 if ((ctype->class_done & BITw (tok_space)) == 0)
19bc17a9
RM
2352 /* "If this keyword [space] is not specified, the characters <space>,
2353 <form-feed>, <newline>, <carriage-return>, <tab>, and
2354 <vertical-tab>, ..., shall automatically belong to this class,
2355 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2356 {
4b10dd6c
UD
2357 uint32_t value;
2358 struct charseq *seq;
19bc17a9 2359
4b10dd6c
UD
2360 value = repertoire_find_value (repertoire, "space", 5);
2361 if (value == ILLEGAL_CHAR_VALUE)
880f421f
UD
2362 {
2363 if (!be_quiet)
2364 error (0, 0, _("\
4b10dd6c
UD
2365%s: character `%s' not defined while needed as default value"),
2366 "LC_CTYPE", "<space>");
880f421f 2367 }
19bc17a9
RM
2368 else
2369 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2370
4b10dd6c
UD
2371 seq = charmap_find_value (charmap, "space", 5);
2372 if (seq == NULL)
880f421f
UD
2373 {
2374 if (!be_quiet)
2375 error (0, 0, _("\
4b10dd6c
UD
2376%s: character `%s' not defined while needed as default value"),
2377 "LC_CTYPE", "<space>");
2378 }
2379 else if (seq->nbytes != 1)
2380 error (0, 0, _("\
2381%s: character `%s' in charmap not representable with one byte"),
2382 "LC_CTYPE", "<space>");
2383 else
2384 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2385
2386
2387 value = repertoire_find_value (repertoire, "form-feed", 9);
2388 if (value == ILLEGAL_CHAR_VALUE)
2389 {
2390 if (!be_quiet)
2391 error (0, 0, _("\
2392%s: character `%s' not defined while needed as default value"),
2393 "LC_CTYPE", "<form-feed>");
880f421f 2394 }
19bc17a9
RM
2395 else
2396 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2397
4b10dd6c
UD
2398 seq = charmap_find_value (charmap, "form-feed", 9);
2399 if (seq == NULL)
880f421f
UD
2400 {
2401 if (!be_quiet)
2402 error (0, 0, _("\
4b10dd6c
UD
2403%s: character `%s' not defined while needed as default value"),
2404 "LC_CTYPE", "<form-feed>");
2405 }
2406 else if (seq->nbytes != 1)
2407 error (0, 0, _("\
2408%s: character `%s' in charmap not representable with one byte"),
2409 "LC_CTYPE", "<form-feed>");
2410 else
2411 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2412
2413
2414 value = repertoire_find_value (repertoire, "newline", 7);
2415 if (value == ILLEGAL_CHAR_VALUE)
2416 {
2417 if (!be_quiet)
2418 error (0, 0, _("\
2419%s: character `%s' not defined while needed as default value"),
2420 "LC_CTYPE", "<newline>");
880f421f 2421 }
19bc17a9
RM
2422 else
2423 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2424
4b10dd6c
UD
2425 seq = charmap_find_value (charmap, "newline", 7);
2426 if (seq == NULL)
880f421f
UD
2427 {
2428 if (!be_quiet)
2429 error (0, 0, _("\
19bc17a9 2430character `%s' not defined while needed as default value"),
4b10dd6c
UD
2431 "<newline>");
2432 }
2433 else if (seq->nbytes != 1)
2434 error (0, 0, _("\
2435%s: character `%s' in charmap not representable with one byte"),
2436 "LC_CTYPE", "<newline>");
2437 else
2438 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2439
2440
2441 value = repertoire_find_value (repertoire, "carriage-return", 15);
2442 if (value == ILLEGAL_CHAR_VALUE)
2443 {
2444 if (!be_quiet)
2445 error (0, 0, _("\
2446%s: character `%s' not defined while needed as default value"),
2447 "LC_CTYPE", "<carriage-return>");
880f421f 2448 }
19bc17a9
RM
2449 else
2450 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2451
4b10dd6c
UD
2452 seq = charmap_find_value (charmap, "carriage-return", 15);
2453 if (seq == NULL)
880f421f
UD
2454 {
2455 if (!be_quiet)
2456 error (0, 0, _("\
4b10dd6c
UD
2457%s: character `%s' not defined while needed as default value"),
2458 "LC_CTYPE", "<carriage-return>");
2459 }
2460 else if (seq->nbytes != 1)
2461 error (0, 0, _("\
2462%s: character `%s' in charmap not representable with one byte"),
2463 "LC_CTYPE", "<carriage-return>");
2464 else
2465 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2466
2467
2468 value = repertoire_find_value (repertoire, "tab", 3);
2469 if (value == ILLEGAL_CHAR_VALUE)
2470 {
2471 if (!be_quiet)
2472 error (0, 0, _("\
2473%s: character `%s' not defined while needed as default value"),
2474 "LC_CTYPE", "<tab>");
880f421f 2475 }
19bc17a9
RM
2476 else
2477 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
2478
4b10dd6c
UD
2479 seq = charmap_find_value (charmap, "tab", 3);
2480 if (seq == NULL)
880f421f
UD
2481 {
2482 if (!be_quiet)
2483 error (0, 0, _("\
4b10dd6c
UD
2484%s: character `%s' not defined while needed as default value"),
2485 "LC_CTYPE", "<tab>");
2486 }
2487 else if (seq->nbytes != 1)
2488 error (0, 0, _("\
2489%s: character `%s' in charmap not representable with one byte"),
2490 "LC_CTYPE", "<tab>");
2491 else
2492 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2493
2494
2495 value = repertoire_find_value (repertoire, "vertical-tab", 12);
2496 if (value == ILLEGAL_CHAR_VALUE)
2497 {
2498 if (!be_quiet)
2499 error (0, 0, _("\
2500%s: character `%s' not defined while needed as default value"),
2501 "LC_CTYPE", "<vertical-tab>");
880f421f 2502 }
19bc17a9
RM
2503 else
2504 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
4b10dd6c
UD
2505
2506 seq = charmap_find_value (charmap, "vertical-tab", 12);
2507 if (seq == NULL)
2508 {
2509 if (!be_quiet)
2510 error (0, 0, _("\
2511%s: character `%s' not defined while needed as default value"),
2512 "LC_CTYPE", "<vertical-tab>");
2513 }
2514 else if (seq->nbytes != 1)
2515 error (0, 0, _("\
2516%s: character `%s' in charmap not representable with one byte"),
2517 "LC_CTYPE", "<vertical-tab>");
2518 else
2519 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
19bc17a9
RM
2520 }
2521
4b10dd6c 2522 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
19bc17a9
RM
2523 /* "If this keyword is not specified, the digits `0' to `9', the
2524 uppercase letters `A' through `F', and the lowercase letters `a'
2525 through `f', ..., shell automatically belong to this class, with
2526 implementation defined character values." [P1003.2, 2.5.2.1] */
2527 {
4b10dd6c
UD
2528 set_default (BITPOS (tok_xdigit), '0', '9');
2529 set_default (BITPOS (tok_xdigit), 'A', 'F');
2530 set_default (BITPOS (tok_xdigit), 'a', 'f');
19bc17a9
RM
2531 }
2532
4b10dd6c 2533 if ((ctype->class_done & BITw (tok_blank)) == 0)
19bc17a9
RM
2534 /* "If this keyword [blank] is unspecified, the characters <space> and
2535 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
2536 {
4b10dd6c
UD
2537 uint32_t value;
2538 struct charseq *seq;
19bc17a9 2539
4b10dd6c
UD
2540 value = repertoire_find_value (repertoire, "space", 5);
2541 if (value == ILLEGAL_CHAR_VALUE)
880f421f
UD
2542 {
2543 if (!be_quiet)
2544 error (0, 0, _("\
4b10dd6c
UD
2545%s: character `%s' not defined while needed as default value"),
2546 "LC_CTYPE", "<space>");
880f421f 2547 }
19bc17a9
RM
2548 else
2549 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
2550
4b10dd6c
UD
2551 seq = charmap_find_value (charmap, "space", 5);
2552 if (seq == NULL)
880f421f
UD
2553 {
2554 if (!be_quiet)
2555 error (0, 0, _("\
4b10dd6c
UD
2556%s: character `%s' not defined while needed as default value"),
2557 "LC_CTYPE", "<space>");
2558 }
2559 else if (seq->nbytes != 1)
2560 error (0, 0, _("\
2561%s: character `%s' in charmap not representable with one byte"),
2562 "LC_CTYPE", "<space>");
2563 else
2564 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
2565
2566
2567 value = repertoire_find_value (repertoire, "tab", 3);
2568 if (value == ILLEGAL_CHAR_VALUE)
2569 {
2570 if (!be_quiet)
2571 error (0, 0, _("\
2572%s: character `%s' not defined while needed as default value"),
2573 "LC_CTYPE", "<tab>");
880f421f 2574 }
19bc17a9
RM
2575 else
2576 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
4b10dd6c
UD
2577
2578 seq = charmap_find_value (charmap, "tab", 3);
2579 if (seq == NULL)
2580 {
2581 if (!be_quiet)
2582 error (0, 0, _("\
2583%s: character `%s' not defined while needed as default value"),
2584 "LC_CTYPE", "<tab>");
2585 }
2586 else if (seq->nbytes != 1)
2587 error (0, 0, _("\
2588%s: character `%s' in charmap not representable with one byte"),
2589 "LC_CTYPE", "<tab>");
2590 else
2591 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
19bc17a9
RM
2592 }
2593
4b10dd6c 2594 if ((ctype->class_done & BITw (tok_graph)) == 0)
19bc17a9
RM
2595 /* "If this keyword [graph] is not specified, characters specified for
2596 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
2597 shall belong to this character class." [P1003.2, 2.5.2.1] */
2598 {
2599 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2600 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2601 size_t cnt;
2602
2603 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2604 if ((ctype->class_collection[cnt] & mask) != 0)
2605 ctype->class_collection[cnt] |= BIT (tok_graph);
4b10dd6c
UD
2606
2607 for (cnt = 0; cnt < 256; ++cnt)
2608 if ((ctype->class256_collection[cnt] & mask) != 0)
2609 ctype->class256_collection[cnt] |= BIT (tok_graph);
19bc17a9
RM
2610 }
2611
4b10dd6c 2612 if ((ctype->class_done & BITw (tok_print)) == 0)
19bc17a9
RM
2613 /* "If this keyword [print] is not provided, characters specified for
2614 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
2615 and the <space> character shall belong to this character class."
2616 [P1003.2, 2.5.2.1] */
2617 {
2618 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
2619 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
2620 size_t cnt;
4b10dd6c
UD
2621 uint32_t space;
2622 struct charseq *seq;
19bc17a9
RM
2623
2624 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2625 if ((ctype->class_collection[cnt] & mask) != 0)
2626 ctype->class_collection[cnt] |= BIT (tok_print);
2627
4b10dd6c
UD
2628 for (cnt = 0; cnt < 256; ++cnt)
2629 if ((ctype->class256_collection[cnt] & mask) != 0)
2630 ctype->class256_collection[cnt] |= BIT (tok_print);
2631
2632
2633 space = repertoire_find_value (repertoire, "space", 5);
880f421f
UD
2634 if (space == ILLEGAL_CHAR_VALUE)
2635 {
2636 if (!be_quiet)
2637 error (0, 0, _("\
4b10dd6c
UD
2638%s: character `%s' not defined while needed as default value"),
2639 "LC_CTYPE", "<space>");
880f421f 2640 }
19bc17a9
RM
2641 else
2642 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
4b10dd6c
UD
2643
2644 seq = charmap_find_value (charmap, "space", 5);
2645 if (seq == NULL)
2646 {
2647 if (!be_quiet)
2648 error (0, 0, _("\
2649%s: character `%s' not defined while needed as default value"),
2650 "LC_CTYPE", "<space>");
2651 }
2652 else if (seq->nbytes != 1)
2653 error (0, 0, _("\
2654%s: character `%s' in charmap not representable with one byte"),
2655 "LC_CTYPE", "<space>");
2656 else
2657 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
19bc17a9
RM
2658 }
2659
4b10dd6c 2660 if (ctype->tomap_done[0] == 0)
6d52618b 2661 /* "If this keyword [toupper] is not specified, the lowercase letters
19bc17a9
RM
2662 `a' through `z', and their corresponding uppercase letters `A' to
2663 `Z', ..., shall automatically be included, with implementation-
2664 defined character values." [P1003.2, 2.5.2.1] */
2665 {
2666 char tmp[4];
2667 int ch;
2668
2669 strcpy (tmp, "<?>");
2670
2671 for (ch = 'a'; ch <= 'z'; ++ch)
2672 {
4b10dd6c
UD
2673 uint32_t value_from, value_to;
2674 struct charseq *seq_from, *seq_to;
19bc17a9
RM
2675
2676 tmp[1] = (char) ch;
2677
4b10dd6c
UD
2678 value_from = repertoire_find_value (repertoire, &tmp[1], 1);
2679 if (value_from == ILLEGAL_CHAR_VALUE)
19bc17a9 2680 {
880f421f
UD
2681 if (!be_quiet)
2682 error (0, 0, _("\
4b10dd6c
UD
2683%s: character `%s' not defined while needed as default value"),
2684 "LC_CTYPE", tmp);
2685 }
2686 else
2687 {
2688 /* This conversion is implementation defined. */
2689 tmp[1] = (char) (ch + ('A' - 'a'));
2690 value_to = repertoire_find_value (repertoire, &tmp[1], 1);
2691 if (value_to == ILLEGAL_CHAR_VALUE)
2692 {
2693 if (!be_quiet)
2694 error (0, 0, _("\
2695%s: character `%s' not defined while needed as default value"),
2696 "LC_CTYPE", tmp);
2697 }
2698 else
2699 /* The index [0] is determined by the order of the
2700 `ctype_map_newP' calls in `ctype_startup'. */
2701 ELEM (ctype, map_collection, [0], value_from) = value_to;
19bc17a9
RM
2702 }
2703
4b10dd6c
UD
2704 seq_from = charmap_find_value (charmap, &tmp[1], 1);
2705 if (seq_from == NULL)
19bc17a9 2706 {
880f421f
UD
2707 if (!be_quiet)
2708 error (0, 0, _("\
4b10dd6c
UD
2709%s: character `%s' not defined while needed as default value"),
2710 "LC_CTYPE", tmp);
2711 }
2712 else if (seq_from->nbytes != 1)
2713 {
2714 if (!be_quiet)
2715 error (0, 0, _("\
2716%s: character `%s' needed as default value not representable with one byte"),
2717 "LC_CTYPE", tmp);
2718 }
2719 else
2720 {
2721 /* This conversion is implementation defined. */
2722 tmp[1] = (char) (ch + ('A' - 'a'));
2723 seq_to = charmap_find_value (charmap, &tmp[1], 1);
2724 if (seq_to == NULL)
2725 {
2726 if (!be_quiet)
2727 error (0, 0, _("\
2728%s: character `%s' not defined while needed as default value"),
2729 "LC_CTYPE", tmp);
2730 }
2731 else if (seq_to->nbytes != 1)
2732 {
2733 if (!be_quiet)
2734 error (0, 0, _("\
2735%s: character `%s' needed as default value not representable with one byte"),
2736 "LC_CTYPE", tmp);
2737 }
2738 else
2739 /* The index [0] is determined by the order of the
2740 `ctype_map_newP' calls in `ctype_startup'. */
2741 ctype->map256_collection[0][seq_from->bytes[0]]
2742 = seq_to->bytes[0];
19bc17a9 2743 }
19bc17a9
RM
2744 }
2745 }
2746
4b10dd6c 2747 if (ctype->tomap_done[1] == 0)
19bc17a9
RM
2748 /* "If this keyword [tolower] is not specified, the mapping shall be
2749 the reverse mapping of the one specified to `toupper'." [P1003.2] */
2750 {
19bc17a9
RM
2751 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
2752 if (ctype->map_collection[0][cnt] != 0)
2753 ELEM (ctype, map_collection, [1],
2754 ctype->map_collection[0][cnt])
2755 = ctype->charnames[cnt];
4b10dd6c
UD
2756
2757 for (cnt = 0; cnt < 256; ++cnt)
2758 if (ctype->map256_collection[0][cnt] != 0)
2759 ctype->map_collection[1][ctype->map_collection[0][cnt]]
2760 = ctype->charnames[cnt];
2761 }
2762
2763 if (ctype->outdigits_act == 0)
2764 {
2765 for (cnt = 0; cnt < 10; ++cnt)
2766 {
2767 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2768 digits + cnt, 1);
2769
2770 if (ctype->mboutdigits[cnt] == NULL)
2771 {
2772 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
2773 longnames[cnt],
2774 strlen (longnames[cnt]));
2775
2776 if (ctype->mboutdigits[cnt] == NULL)
2777 {
2778 /* Provide a replacement. */
2779 error (0, 0, _("\
2780no output digits defined and none of the standard names in the charmap"));
2781
2782 ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
2783 sizeof (struct charseq) + 1);
2784
2785 /* This is better than nothing. */
2786 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
2787 ctype->mboutdigits[cnt]->nbytes = 1;
2788 }
2789 }
2790 }
2791
2792 ctype->outdigits_act = 10;
19bc17a9
RM
2793 }
2794}
2795
2796
2797static void
4b10dd6c
UD
2798allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2799 struct repertoire_t *repertoire)
19bc17a9
RM
2800{
2801 size_t idx;
2802
6d52618b
UD
2803 /* First we have to decide how we organize the arrays. It is easy
2804 for a one-byte character set. But multi-byte character set
2805 cannot be stored flat because the chars might be sparsely used.
2806 So we determine an optimal hashing function for the used
2807 characters.
2808
2809 We use a very trivial hashing function to store the sparse
2810 table. CH % TABSIZE is used as an index. To solve multiple hits
2811 we have N planes. This guarantees a fixed search time for a
2812 character [N / 2]. In the following code we determine the minmum
2813 value for TABSIZE * N, where TABSIZE >= 256. */
19bc17a9
RM
2814 size_t min_total = UINT_MAX;
2815 size_t act_size = 256;
2816
c84142e8
UD
2817 if (!be_quiet)
2818 fputs (_("\
19bc17a9 2819Computing table size for character classes might take a while..."),
c84142e8 2820 stderr);
19bc17a9
RM
2821
2822 while (act_size < min_total)
2823 {
2824 size_t cnt[act_size];
2825 size_t act_planes = 1;
2826
2827 memset (cnt, '\0', sizeof cnt);
2828
2829 for (idx = 0; idx < 256; ++idx)
2830 cnt[idx] = 1;
2831
2832 for (idx = 0; idx < ctype->charnames_act; ++idx)
2833 if (ctype->charnames[idx] >= 256)
2834 {
2835 size_t nr = ctype->charnames[idx] % act_size;
2836
2837 if (++cnt[nr] > act_planes)
2838 {
2839 act_planes = cnt[nr];
2840 if (act_size * act_planes >= min_total)
2841 break;
2842 }
2843 }
2844
2845 if (act_size * act_planes < min_total)
2846 {
2847 min_total = act_size * act_planes;
2848 ctype->plane_size = act_size;
2849 ctype->plane_cnt = act_planes;
2850 }
2851
2852 ++act_size;
2853 }
2854
c84142e8
UD
2855 if (!be_quiet)
2856 fputs (_(" done\n"), stderr);
19bc17a9 2857
75cd5204 2858
19bc17a9
RM
2859#if __BYTE_ORDER == __LITTLE_ENDIAN
2860# define NAMES_B1 ctype->names_el
2861# define NAMES_B2 ctype->names_eb
2862#else
2863# define NAMES_B1 ctype->names_eb
2864# define NAMES_B2 ctype->names_el
2865#endif
2866
4b10dd6c
UD
2867 ctype->names_eb = (uint32_t *) xcalloc (ctype->plane_size
2868 * ctype->plane_cnt,
2869 sizeof (uint32_t));
2870 ctype->names_el = (uint32_t *) xcalloc (ctype->plane_size
2871 * ctype->plane_cnt,
2872 sizeof (uint32_t));
19bc17a9
RM
2873
2874 for (idx = 1; idx < 256; ++idx)
2875 NAMES_B1[idx] = idx;
2876
2877 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
2878 NAMES_B1[0] = 1;
2879
2880 for (idx = 256; idx < ctype->charnames_act; ++idx)
2881 {
2882 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
2883 size_t depth = 0;
2884
2885 while (NAMES_B1[nr + depth * ctype->plane_size])
2886 ++depth;
2887 assert (depth < ctype->plane_cnt);
2888
2889 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
2890
2891 /* Now for faster access remember the index in the NAMES_B array. */
2892 ctype->charnames[idx] = nr + depth * ctype->plane_size;
2893 }
2894 NAMES_B1[0] = 0;
2895
2896 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
4b10dd6c 2897 NAMES_B2[idx] = bswap_32 (NAMES_B1[idx]);
19bc17a9
RM
2898
2899
2900 /* You wonder about this amount of memory? This is only because some
2901 users do not manage to address the array with unsigned values or
2902 data types with range >= 256. '\200' would result in the array
2903 index -128. To help these poor people we duplicate the entries for
2904 128 up to 255 below the entry for \0. */
2905 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
2906 sizeof (char_class_t));
2907 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
2908 * ctype->plane_cnt,
2909 sizeof (char_class32_t));
2910
2911 /* Fill in the character class information. */
2912#if __BYTE_ORDER == __LITTLE_ENDIAN
2913# define TRANS(w) CHAR_CLASS_TRANS (w)
2914# define TRANS32(w) CHAR_CLASS32_TRANS (w)
2915#else
2916# define TRANS(w) (w)
2917# define TRANS32(w) (w)
2918#endif
2919
4b10dd6c
UD
2920 /* This is the array accessed usig the multibyte string elements. */
2921 for (idx = 0; idx < 256; ++idx)
2922 ctype->ctype_b[128 + idx] = TRANS (ctype->class256_collection[idx]);
19bc17a9 2923
75cd5204
RM
2924 /* Mirror first 127 entries. We must take care that entry -1 is not
2925 mirrored because EOF == -1. */
2926 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
2927 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
2928
2929 /* The 32 bit array contains all characters. */
2930 for (idx = 0; idx < ctype->class_collection_act; ++idx)
2931 ctype->ctype32_b[ctype->charnames[idx]]
2932 = TRANS32 (ctype->class_collection[idx]);
2933
2934 /* Room for table of mappings. */
4b10dd6c
UD
2935 ctype->map_eb = (uint32_t **) xmalloc (ctype->map_collection_nr
2936 * sizeof (uint32_t *));
2937 ctype->map_el = (uint32_t **) xmalloc (ctype->map_collection_nr
2938 * sizeof (uint32_t *));
19bc17a9
RM
2939
2940 /* Fill in all mappings. */
2941 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
2942 {
2943 unsigned int idx2;
2944
2945 /* Allocate table. */
4b10dd6c
UD
2946 ctype->map_eb[idx] = (uint32_t *) xmalloc ((ctype->plane_size
2947 * ctype->plane_cnt + 128)
2948 * sizeof (uint32_t));
2949 ctype->map_el[idx] = (uint32_t *) xmalloc ((ctype->plane_size
2950 * ctype->plane_cnt + 128)
2951 * sizeof (uint32_t));
19bc17a9
RM
2952
2953#if __BYTE_ORDER == __LITTLE_ENDIAN
2954# define MAP_B1 ctype->map_el
2955# define MAP_B2 ctype->map_eb
2956#else
2957# define MAP_B1 ctype->map_eb
2958# define MAP_B2 ctype->map_el
2959#endif
2960
2961 /* Copy default value (identity mapping). */
2962 memcpy (&MAP_B1[idx][128], NAMES_B1,
4b10dd6c 2963 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
19bc17a9
RM
2964
2965 /* Copy values from collection. */
4b10dd6c
UD
2966 for (idx2 = 0; idx2 < 256; ++idx2)
2967 MAP_B1[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
19bc17a9 2968
75cd5204
RM
2969 /* Mirror first 127 entries. We must take care not to map entry
2970 -1 because EOF == -1. */
2971 for (idx2 = 0; idx2 < 127; ++idx2)
19bc17a9
RM
2972 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
2973
75cd5204
RM
2974 /* EOF must map to EOF. */
2975 MAP_B1[idx][127] = EOF;
19bc17a9
RM
2976
2977 /* And now the other byte order. */
2978 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
4b10dd6c 2979 MAP_B2[idx][idx2] = bswap_32 (MAP_B1[idx][idx2]);
19bc17a9
RM
2980 }
2981
2982 /* Extra array for class and map names. */
4b10dd6c
UD
2983 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
2984 * sizeof (uint32_t));
2985 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
2986 * sizeof (uint32_t));
75cd5204
RM
2987
2988 /* Array for width information. Because the expected width are very
2989 small we use only one single byte. This save space and we need
2990 not provide the information twice with both endianesses. */
2991 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
2992 * ctype->plane_cnt);
2993 /* Initialize with default width value. */
4b10dd6c 2994 memset (ctype->width, charmap->width_default,
75cd5204 2995 ctype->plane_size * ctype->plane_cnt);
4b10dd6c 2996 if (charmap->width_rules != NULL)
75cd5204 2997 {
4b10dd6c 2998#if 0
75cd5204
RM
2999 size_t cnt;
3000
4b10dd6c
UD
3001 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3002 if (charmap->width_rules[cnt].width != charmap->width_default)
3003 for (idx = charmap->width_rules[cnt].from;
3004 idx <= charmap->width_rules[cnt].to; ++idx)
75cd5204
RM
3005 {
3006 size_t nr = idx % ctype->plane_size;
3007 size_t depth = 0;
3008
3009 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
3010 ++depth;
3011 assert (depth < ctype->plane_cnt);
3012
3013 ctype->width[nr + depth * ctype->plane_size]
4b10dd6c 3014 = charmap->width_rules[cnt].width;
75cd5204 3015 }
4b10dd6c
UD
3016#else
3017 abort ();
3018#endif
75cd5204 3019 }
0200214b 3020
4b10dd6c
UD
3021 /* Set MB_CUR_MAX. */
3022 ctype->mb_cur_max = charmap->mb_cur_max;
6990326c
RM
3023
3024 /* We need the name of the currently used 8-bit character set to
3025 make correct conversion between this 8-bit representation and the
3026 ISO 10646 character set used internally for wide characters. */
4b10dd6c
UD
3027 ctype->codeset_name = charmap->code_set_name;
3028
3029 /* Now determine the table for the transliteration information.
3030
3031 XXX It is not yet clear to me whether it is worth implementing a
3032 complicated algorithm which uses a hash table to locate the entries.
3033 For now I'll use a simple array which can be searching using binary
3034 search. */
3035 if (ctype->translit_copy_locale != NULL)
3036 {
3037 /* Fold in the transliteration information from the locale mentioned
3038 in the `include' statement. */
3039 struct locale_ctype_t *here = ctype;
3040
3041 do
3042 {
3043 struct localedef_t *other = find_locale (LC_CTYPE,
3044 here->translit_copy_locale,
3045 repertoire->name, charmap);
3046
3047 if (other == NULL)
3048 {
3049 error (0, 0, _("\
3050%s: transliteration data from locale `%s' not available"),
3051 "LC_CTYPE", here->translit_copy_locale);
3052 break;
3053 }
3054
3055 here = other->categories[LC_CTYPE].ctype;
3056
3057 /* Enqueue the information if necessary. */
3058 if (here->translit != NULL)
3059 {
3060 struct translit_t *endp = here->translit;
3061 while (endp->next != NULL)
3062 endp = endp->next;
3063
3064 endp->next = ctype->translit;
3065 ctype->translit = here->translit;
3066 }
3067 }
3068 while (here->translit_copy_locale != NULL);
3069 }
3070
3071 if (ctype->translit != NULL)
3072 {
3073 /* First count how many entries we have. This is the upper limit
3074 since some entries from the included files might be overwritten. */
3075 size_t number = 0;
3076 size_t cnt;
3077 struct translit_t *runp = ctype->translit;
3078 struct translit_t **sorted;
3079 size_t from_len, to_len;
3080
3081 while (runp != NULL)
3082 {
3083 ++number;
3084 runp = runp->next;
3085 }
3086
3087 /* Next we allocate an array large enough and fill in the values. */
3088 sorted = alloca (number * sizeof (struct translit_t **));
3089 runp = ctype->translit;
3090 number = 0;
3091 do
3092 {
3093 /* Search for the place where to insert this string.
3094 XXX Better use a real sorting algorithm later. */
3095 size_t idx = 0;
3096 int replace = 0;
3097
3098 while (idx < number)
3099 {
3100 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3101 (const wchar_t *) runp->from);
3102 if (res == 0)
3103 {
3104 replace = 1;
3105 break;
3106 }
3107 if (res > 0)
3108 break;
3109 ++idx;
3110 }
3111
3112 if (replace)
3113 sorted[idx] = runp;
3114 else
3115 {
3116 memmove (&sorted[idx + 1], &sorted[idx],
3117 (number - idx) * sizeof (struct translit_t *));
3118 sorted[idx] = runp;
3119 ++number;
3120 }
3121
3122 runp = runp->next;
3123 }
3124 while (runp != NULL);
3125
3126 /* The next step is putting all the possible transliteration
3127 strings in one memory block so that we can write it out.
3128 We need several different blocks:
3129 - index to the tfromstring array
3130 - from-string array
3131 - index to the to-string array
3132 - to-string array.
3133 And this all must be available for both endianes variants.
3134 */
3135 from_len = to_len = 0;
3136 for (cnt = 0; cnt < number; ++cnt)
3137 {
3138 struct translit_to_t *srunp;
3139 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3140 srunp = sorted[cnt]->to;
3141 while (srunp != NULL)
3142 {
3143 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3144 srunp = srunp->next;
3145 }
3146 /* Plus one for the extra NUL character marking the end of
3147 the list for the current entry. */
3148 ++to_len;
3149 }
3150
3151 /* We can allocate the arrays for the results. */
3152#if BYTE_ORDER == LITTLE_ENDIAN
3153# define from_idx translit_from_idx_el
3154# define from_tbl translit_from_tbl_el
3155# define to_idx translit_to_idx_el
3156# define to_tbl translit_to_tbl_el
3157# define from_idx_ob translit_from_idx_eb
3158# define from_tbl_ob translit_from_tbl_eb
3159# define to_idx_ob translit_to_idx_eb
3160# define to_tbl_ob translit_to_tbl_eb
3161#else
3162# define from_idx translit_from_idx_eb
3163# define from_tbl translit_from_tbl_eb
3164# define to_idx translit_to_idx_eb
3165# define to_tbl translit_to_tbl_eb
3166# define from_idx_ob translit_from_idx_el
3167# define from_tbl_ob translit_from_tbl_el
3168# define to_idx_ob translit_to_idx_el
3169# define to_tbl_ob translit_to_tbl_el
3170#endif
3171 ctype->from_idx = xmalloc (number * sizeof (uint32_t));
3172 ctype->from_idx_ob = xmalloc (number * sizeof (uint32_t));
3173 ctype->from_tbl = xmalloc (from_len * sizeof (uint32_t));
3174 ctype->from_tbl_ob = xmalloc (from_len * sizeof (uint32_t));
3175 ctype->to_idx = xmalloc (number * sizeof (uint32_t));
3176 ctype->to_idx_ob = xmalloc (number * sizeof (uint32_t));
3177 ctype->to_tbl = xmalloc (to_len * sizeof (uint32_t));
3178 ctype->to_tbl_ob = xmalloc (to_len * sizeof (uint32_t));
3179
3180 from_len = 0;
3181 to_len = 0;
3182 for (cnt = 0; cnt < number; ++cnt)
3183 {
3184 size_t len;
3185 struct translit_to_t *srunp;
3186
3187 ctype->from_idx[cnt] = from_len;
3188 ctype->to_idx[cnt] = to_len;
3189
3190 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3191 wmemcpy ((wchar_t *) &ctype->from_tbl[from_len],
3192 (const wchar_t *) sorted[cnt]->from, len);
3193 from_len += len;
3194
3195 ctype->to_idx[cnt] = to_len;
3196 srunp = sorted[cnt]->to;
3197 while (srunp != NULL)
3198 {
3199 len = wcslen ((const wchar_t *) srunp->str) + 1;
3200 wmemcpy ((wchar_t *) &ctype->to_tbl[to_len],
3201 (const wchar_t *) srunp->str, len);
3202 to_len += len;
3203 srunp = srunp->next;
3204 }
3205 ctype->to_tbl[to_len++] = L'\0';
3206 }
3207
3208 /* Now create the tables for the other endianess. */
3209 for (cnt = 0; cnt < number; ++cnt)
3210 {
3211 ctype->from_idx_ob[cnt] = bswap_32 (ctype->from_idx[cnt]);
3212 ctype->to_idx_ob[cnt] = bswap_32 (ctype->to_idx[cnt]);
3213 }
3214 for (cnt = 0; cnt < from_len; ++cnt)
3215 ctype->from_tbl[cnt] = bswap_32 (ctype->from_tbl_ob[cnt]);
3216 for (cnt = 0; cnt < to_len; ++cnt)
3217 ctype->to_tbl[cnt] = bswap_32 (ctype->to_tbl_ob[cnt]);
3218
3219 /* Store the information about the length. */
3220 ctype->translit_idx_size = number * sizeof (uint32_t);
3221 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3222 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3223 }
3224 else
3225 {
3226 /* Provide some dummy pointers since we have nothing to write out. */
3227 static uint32_t no_str = { 0 };
3228
3229 ctype->translit_from_idx_el = &no_str;
3230 ctype->translit_from_idx_eb = &no_str;
3231 ctype->translit_from_tbl_el = &no_str;
3232 ctype->translit_from_tbl_eb = &no_str;
3233 ctype->translit_to_tbl_el = &no_str;
3234 ctype->translit_to_tbl_eb = &no_str;
3235 ctype->translit_idx_size = 0;
3236 ctype->translit_from_tbl_size = 0;
3237 ctype->translit_to_tbl_size = 0;
3238 }
19bc17a9 3239}