1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
42 #include "localedef.h"
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
81 struct translit_to_t
*next
;
91 struct translit_to_t
*to
;
93 struct translit_t
*next
;
96 struct translit_ignore_t
105 struct translit_ignore_t
*next
;
109 /* The real definition of the struct for the LC_CTYPE locale. */
110 struct locale_ctype_t
113 size_t charnames_max
;
114 size_t charnames_act
;
116 struct repertoire_t
*repertoire
;
118 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
119 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
121 const char *classnames
[MAX_NR_CHARCLASS
];
122 uint32_t last_class_char
;
123 uint32_t class256_collection
[256];
124 uint32_t *class_collection
;
125 size_t class_collection_max
;
126 size_t class_collection_act
;
128 uint32_t class_offset
;
130 struct charseq
**mbdigits
;
137 struct charseq
*mboutdigits
[10];
138 uint32_t wcoutdigits
[10];
139 size_t outdigits_act
;
141 /* If the following number ever turns out to be too small simply
142 increase it. But I doubt it will. --drepper@gnu */
143 #define MAX_NR_CHARMAP 16
144 const char *mapnames
[MAX_NR_CHARMAP
];
145 uint32_t *map_collection
[MAX_NR_CHARMAP
];
146 uint32_t map256_collection
[2][256];
147 size_t map_collection_max
[MAX_NR_CHARMAP
];
148 size_t map_collection_act
[MAX_NR_CHARMAP
];
149 size_t map_collection_nr
;
151 int tomap_done
[MAX_NR_CHARMAP
];
154 /* Transliteration information. */
155 const char *translit_copy_locale
;
156 const char *translit_copy_repertoire
;
157 struct translit_t
*translit
;
158 struct translit_ignore_t
*translit_ignore
;
159 uint32_t ntranslit_ignore
;
161 uint32_t *default_missing
;
162 const char *default_missing_file
;
163 size_t default_missing_lineno
;
165 /* The arrays for the binary representation. */
168 char_class_t
*ctype_b
;
169 char_class32_t
*ctype32_b
;
173 struct iovec
*class_3level
;
174 struct iovec
*map_3level
;
175 uint32_t *class_name_ptr
;
176 uint32_t *map_name_ptr
;
177 unsigned char *width
;
178 struct iovec width_3level
;
180 const char *codeset_name
;
181 uint32_t *translit_from_idx
;
182 uint32_t *translit_from_tbl
;
183 uint32_t *translit_to_idx
;
184 uint32_t *translit_to_tbl
;
185 uint32_t translit_idx_size
;
186 size_t translit_from_tbl_size
;
187 size_t translit_to_tbl_size
;
189 struct obstack mempool
;
193 #define obstack_chunk_alloc xmalloc
194 #define obstack_chunk_free free
197 /* Prototypes for local functions. */
198 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
199 struct charmap_t
*charmap
, int ignore_content
);
200 static void ctype_class_new (struct linereader
*lr
,
201 struct locale_ctype_t
*ctype
, const char *name
);
202 static void ctype_map_new (struct linereader
*lr
,
203 struct locale_ctype_t
*ctype
,
204 const char *name
, struct charmap_t
*charmap
);
205 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
206 size_t *max
, size_t *act
, unsigned int idx
);
207 static void set_class_defaults (struct locale_ctype_t
*ctype
,
208 struct charmap_t
*charmap
,
209 struct repertoire_t
*repertoire
);
210 static void allocate_arrays (struct locale_ctype_t
*ctype
,
211 struct charmap_t
*charmap
,
212 struct repertoire_t
*repertoire
);
215 static const char *longnames
[] =
217 "zero", "one", "two", "three", "four",
218 "five", "six", "seven", "eight", "nine"
220 static const char *uninames
[] =
222 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
223 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
225 static const unsigned char digits
[] = "0123456789";
229 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
230 struct charmap_t
*charmap
, int ignore_content
)
233 struct locale_ctype_t
*ctype
;
237 /* Allocate the needed room. */
238 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
239 (struct locale_ctype_t
*) xcalloc (1, sizeof (struct locale_ctype_t
));
241 /* We have seen no names yet. */
242 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
244 (unsigned int *) xmalloc (ctype
->charnames_max
245 * sizeof (unsigned int));
246 for (cnt
= 0; cnt
< 256; ++cnt
)
247 ctype
->charnames
[cnt
] = cnt
;
248 ctype
->charnames_act
= 256;
250 /* Fill character class information. */
251 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
252 /* The order of the following instructions determines the bit
254 ctype_class_new (lr
, ctype
, "upper");
255 ctype_class_new (lr
, ctype
, "lower");
256 ctype_class_new (lr
, ctype
, "alpha");
257 ctype_class_new (lr
, ctype
, "digit");
258 ctype_class_new (lr
, ctype
, "xdigit");
259 ctype_class_new (lr
, ctype
, "space");
260 ctype_class_new (lr
, ctype
, "print");
261 ctype_class_new (lr
, ctype
, "graph");
262 ctype_class_new (lr
, ctype
, "blank");
263 ctype_class_new (lr
, ctype
, "cntrl");
264 ctype_class_new (lr
, ctype
, "punct");
265 ctype_class_new (lr
, ctype
, "alnum");
266 #ifdef PREDEFINED_CLASSES
267 /* The following are extensions from ISO 14652. */
268 ctype_class_new (lr
, ctype
, "left_to_right");
269 ctype_class_new (lr
, ctype
, "right_to_left");
270 ctype_class_new (lr
, ctype
, "num_terminator");
271 ctype_class_new (lr
, ctype
, "num_separator");
272 ctype_class_new (lr
, ctype
, "segment_separator");
273 ctype_class_new (lr
, ctype
, "block_separator");
274 ctype_class_new (lr
, ctype
, "direction_control");
275 ctype_class_new (lr
, ctype
, "sym_swap_layout");
276 ctype_class_new (lr
, ctype
, "char_shape_selector");
277 ctype_class_new (lr
, ctype
, "num_shape_selector");
278 ctype_class_new (lr
, ctype
, "non_spacing");
279 ctype_class_new (lr
, ctype
, "non_spacing_level3");
280 ctype_class_new (lr
, ctype
, "normal_connect");
281 ctype_class_new (lr
, ctype
, "r_connect");
282 ctype_class_new (lr
, ctype
, "no_connect");
283 ctype_class_new (lr
, ctype
, "no_connect-space");
284 ctype_class_new (lr
, ctype
, "vowel_connect");
287 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
288 ctype
->class_collection
289 = (uint32_t *) xcalloc (sizeof (unsigned long int),
290 ctype
->class_collection_max
);
291 ctype
->class_collection_act
= 256;
293 /* Fill character map information. */
294 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
295 ctype_map_new (lr
, ctype
, "toupper", charmap
);
296 ctype_map_new (lr
, ctype
, "tolower", charmap
);
297 #ifdef PREDEFINED_CLASSES
298 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
301 /* Fill first 256 entries in `toXXX' arrays. */
302 for (cnt
= 0; cnt
< 256; ++cnt
)
304 ctype
->map_collection
[0][cnt
] = cnt
;
305 ctype
->map_collection
[1][cnt
] = cnt
;
306 #ifdef PREDEFINED_CLASSES
307 ctype
->map_collection
[2][cnt
] = cnt
;
309 ctype
->map256_collection
[0][cnt
] = cnt
;
310 ctype
->map256_collection
[1][cnt
] = cnt
;
313 obstack_init (&ctype
->mempool
);
319 ctype_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
321 /* See POSIX.2, table 2-6 for the meaning of the following table. */
326 const char allow
[NCLASS
];
328 valid_table
[NCLASS
] =
330 /* The order is important. See token.h for more information.
331 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
332 { "upper", "--MX-XDDXXX-" },
333 { "lower", "--MX-XDDXXX-" },
334 { "alpha", "---X-XDDXXX-" },
335 { "digit", "XXX--XDDXXX-" },
336 { "xdigit", "-----XDDXXX-" },
337 { "space", "XXXXX------X" },
338 { "print", "---------X--" },
339 { "graph", "---------X--" },
340 { "blank", "XXXXXM-----X" },
341 { "cntrl", "XXXXX-XX--XX" },
342 { "punct", "XXXXX-DD-X-X" },
343 { "alnum", "-----XDDXXX-" }
347 uint32_t space_value
;
348 struct charseq
*space_seq
;
349 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
356 /* Now resolve copying and also handle completely missing definitions. */
359 const char *repertoire_name
;
361 /* First see whether we were supposed to copy. If yes, find the
362 actual definition. */
363 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
365 /* Find the copying locale. This has to happen transitively since
366 the locale we are copying from might also copying another one. */
367 struct localedef_t
*from
= locale
;
370 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
371 from
->repertoire_name
, charmap
);
372 while (from
->categories
[LC_CTYPE
].ctype
== NULL
373 && from
->copy_name
[LC_CTYPE
] != NULL
);
375 ctype
= locale
->categories
[LC_CTYPE
].ctype
376 = from
->categories
[LC_CTYPE
].ctype
;
379 /* If there is still no definition issue an warning and create an
384 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
385 ctype_startup (NULL
, locale
, charmap
, 0);
386 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
389 /* Get the repertoire we have to use. */
390 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
391 if (repertoire_name
!= NULL
)
392 ctype
->repertoire
= repertoire_read (repertoire_name
);
395 /* We need the name of the currently used 8-bit character set to
396 make correct conversion between this 8-bit representation and the
397 ISO 10646 character set used internally for wide characters. */
398 ctype
->codeset_name
= charmap
->code_set_name
;
399 if (ctype
->codeset_name
== NULL
)
402 error (0, 0, "no character set name specified in charmap");
403 ctype
->codeset_name
= "//UNKNOWN//";
406 /* Set default value for classes not specified. */
407 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
409 /* Check according to table. */
410 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
412 uint32_t tmp
= ctype
->class_collection
[cnt
];
416 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
417 if ((tmp
& _ISwbit (cls1
)) != 0)
418 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
419 if (valid_table
[cls1
].allow
[cls2
] != '-')
421 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
422 switch (valid_table
[cls1
].allow
[cls2
])
427 uint32_t value
= ctype
->charnames
[cnt
];
431 character L'\\u%0*x' in class `%s' must be in class `%s'"),
432 value
> 0xffff ? 8 : 4, value
,
433 valid_table
[cls1
].name
,
434 valid_table
[cls2
].name
);
441 uint32_t value
= ctype
->charnames
[cnt
];
445 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
446 value
> 0xffff ? 8 : 4, value
,
447 valid_table
[cls1
].name
,
448 valid_table
[cls2
].name
);
453 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
457 error (5, 0, _("internal error in %s, line %u"),
458 __FUNCTION__
, __LINE__
);
464 for (cnt
= 0; cnt
< 256; ++cnt
)
466 uint32_t tmp
= ctype
->class256_collection
[cnt
];
470 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
471 if ((tmp
& _ISbit (cls1
)) != 0)
472 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
473 if (valid_table
[cls1
].allow
[cls2
] != '-')
475 int eq
= (tmp
& _ISbit (cls2
)) != 0;
476 switch (valid_table
[cls1
].allow
[cls2
])
483 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
487 character '%s' in class `%s' must be in class `%s'"),
488 buf
, valid_table
[cls1
].name
,
489 valid_table
[cls2
].name
);
498 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
502 character '%s' in class `%s' must not be in class `%s'"),
503 buf
, valid_table
[cls1
].name
,
504 valid_table
[cls2
].name
);
509 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
513 error (5, 0, _("internal error in %s, line %u"),
514 __FUNCTION__
, __LINE__
);
520 /* ... and now test <SP> as a special case. */
522 if (((cnt
= BITPOS (tok_space
),
523 (ELEM (ctype
, class_collection
, , space_value
)
524 & BITw (tok_space
)) == 0)
525 || (cnt
= BITPOS (tok_blank
),
526 (ELEM (ctype
, class_collection
, , space_value
)
527 & BITw (tok_blank
)) == 0)))
530 error (0, 0, _("<SP> character not in class `%s'"),
531 valid_table
[cnt
].name
);
533 else if (((cnt
= BITPOS (tok_punct
),
534 (ELEM (ctype
, class_collection
, , space_value
)
535 & BITw (tok_punct
)) != 0)
536 || (cnt
= BITPOS (tok_graph
),
537 (ELEM (ctype
, class_collection
, , space_value
)
542 error (0, 0, _("<SP> character must not be in class `%s'"),
543 valid_table
[cnt
].name
);
546 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
548 space_seq
= charmap_find_value (charmap
, "SP", 2);
549 if (space_seq
== NULL
)
550 space_seq
= charmap_find_value (charmap
, "space", 5);
551 if (space_seq
== NULL
)
552 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
553 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
556 error (0, 0, _("character <SP> not defined in character map"));
558 else if (((cnt
= BITPOS (tok_space
),
559 (ctype
->class256_collection
[space_seq
->bytes
[0]]
560 & BIT (tok_space
)) == 0)
561 || (cnt
= BITPOS (tok_blank
),
562 (ctype
->class256_collection
[space_seq
->bytes
[0]]
563 & BIT (tok_blank
)) == 0)))
566 error (0, 0, _("<SP> character not in class `%s'"),
567 valid_table
[cnt
].name
);
569 else if (((cnt
= BITPOS (tok_punct
),
570 (ctype
->class256_collection
[space_seq
->bytes
[0]]
571 & BIT (tok_punct
)) != 0)
572 || (cnt
= BITPOS (tok_graph
),
573 (ctype
->class256_collection
[space_seq
->bytes
[0]]
574 & BIT (tok_graph
)) != 0)))
577 error (0, 0, _("<SP> character must not be in class `%s'"),
578 valid_table
[cnt
].name
);
581 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
583 /* Now that the tests are done make sure the name array contains all
584 characters which are handled in the WIDTH section of the
585 character set definition file. */
586 if (charmap
->width_rules
!= NULL
)
587 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
589 unsigned char bytes
[charmap
->mb_cur_max
];
590 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
592 /* We have the range of character for which the width is
593 specified described using byte sequences of the multibyte
594 charset. We have to convert this to UCS4 now. And we
595 cannot simply convert the beginning and the end of the
596 sequence, we have to iterate over the byte sequence and
597 convert it for every single character. */
598 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
600 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
601 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
604 /* Find the UCS value for `bytes'. */
607 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
610 wch
= ILLEGAL_CHAR_VALUE
;
611 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
614 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
617 if (wch
!= ILLEGAL_CHAR_VALUE
)
618 /* We are only interested in the side-effects of the
619 `find_idx' call. It will add appropriate entries in
620 the name array if this is necessary. */
621 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
623 /* "Increment" the bytes sequence. */
625 while (inner
>= 0 && bytes
[inner
] == 0xff)
630 /* We have to extend the byte sequence. */
631 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
635 memset (&bytes
[1], 0, nbytes
);
641 while (++inner
< nbytes
)
647 /* Now set all the other characters of the character set to the
650 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
652 struct charseq
*data
= (struct charseq
*) vdata
;
654 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
655 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
658 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
659 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
662 /* There must be a multiple of 10 digits. */
663 if (ctype
->mbdigits_act
% 10 != 0)
665 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
666 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
667 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
668 error (0, 0, _("`digit' category has not entries in groups of ten"));
671 /* Check the input digits. There must be a multiple of ten available.
672 In each group it could be that one or the other character is missing.
673 In this case the whole group must be removed. */
675 while (cnt
< ctype
->mbdigits_act
)
678 for (inner
= 0; inner
< 10; ++inner
)
679 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
686 /* Remove the group. */
687 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
688 ((ctype
->wcdigits_act
- cnt
- 10)
689 * sizeof (ctype
->mbdigits
[0])));
690 ctype
->mbdigits_act
-= 10;
694 /* If no input digits are given use the default. */
695 if (ctype
->mbdigits_act
== 0)
697 if (ctype
->mbdigits_max
== 0)
699 ctype
->mbdigits
= obstack_alloc (&charmap
->mem_pool
,
700 10 * sizeof (struct charseq
*));
701 ctype
->mbdigits_max
= 10;
704 for (cnt
= 0; cnt
< 10; ++cnt
)
706 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
708 if (ctype
->mbdigits
[cnt
] == NULL
)
710 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
712 strlen (longnames
[cnt
]));
713 if (ctype
->mbdigits
[cnt
] == NULL
)
715 /* Hum, this ain't good. */
717 no input digits defined and none of the standard names in the charmap"));
719 ctype
->mbdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
720 sizeof (struct charseq
) + 1);
722 /* This is better than nothing. */
723 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
724 ctype
->mbdigits
[cnt
]->nbytes
= 1;
729 ctype
->mbdigits_act
= 10;
732 /* Check the wide character input digits. There must be a multiple
733 of ten available. In each group it could be that one or the other
734 character is missing. In this case the whole group must be
737 while (cnt
< ctype
->wcdigits_act
)
740 for (inner
= 0; inner
< 10; ++inner
)
741 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
748 /* Remove the group. */
749 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
750 ((ctype
->wcdigits_act
- cnt
- 10)
751 * sizeof (ctype
->wcdigits
[0])));
752 ctype
->wcdigits_act
-= 10;
756 /* If no input digits are given use the default. */
757 if (ctype
->wcdigits_act
== 0)
759 if (ctype
->wcdigits_max
== 0)
761 ctype
->wcdigits
= obstack_alloc (&charmap
->mem_pool
,
762 10 * sizeof (uint32_t));
763 ctype
->wcdigits_max
= 10;
766 for (cnt
= 0; cnt
< 10; ++cnt
)
767 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
769 ctype
->mbdigits_act
= 10;
772 /* Check the outdigits. */
774 for (cnt
= 0; cnt
< 10; ++cnt
)
775 if (ctype
->mboutdigits
[cnt
] == NULL
)
777 static struct charseq replace
[2];
782 not all characters used in `outdigit' are available in the charmap"));
786 replace
[0].nbytes
= 1;
787 replace
[0].bytes
[0] = '?';
788 replace
[0].bytes
[1] = '\0';
789 ctype
->mboutdigits
[cnt
] = &replace
[0];
793 for (cnt
= 0; cnt
< 10; ++cnt
)
794 if (ctype
->wcoutdigits
[cnt
] == 0)
799 not all characters used in `outdigit' are available in the repertoire"));
803 ctype
->wcoutdigits
[cnt
] = L
'?';
806 /* Sort the entries in the translit_ignore list. */
807 if (ctype
->translit_ignore
!= NULL
)
809 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
810 struct translit_ignore_t
*runp
;
812 ctype
->ntranslit_ignore
= 1;
814 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
816 struct translit_ignore_t
*lastp
= NULL
;
817 struct translit_ignore_t
*cmpp
;
819 ++ctype
->ntranslit_ignore
;
821 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
822 if (runp
->from
< cmpp
->from
)
830 ctype
->translit_ignore
= firstp
;
836 ctype_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
837 const char *output_path
)
839 static const char nulbytes
[4] = { 0, 0, 0, 0 };
840 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
841 const size_t nelems
= (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)
843 ? (ctype
->map_collection_nr
- 2)
844 : (ctype
->nr_charclass
+ ctype
->map_collection_nr
)));
845 struct iovec iov
[2 + nelems
+ ctype
->nr_charclass
846 + ctype
->map_collection_nr
+ 2];
847 struct locale_file data
;
848 uint32_t idx
[nelems
+ 1];
849 uint32_t default_missing_len
;
850 size_t elem
, cnt
, offset
, total
;
853 /* Now prepare the output: Find the sizes of the table we can use. */
854 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
856 data
.magic
= LIMAGIC (LC_CTYPE
);
858 iov
[0].iov_base
= (void *) &data
;
859 iov
[0].iov_len
= sizeof (data
);
861 iov
[1].iov_base
= (void *) idx
;
862 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
864 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
867 for (elem
= 0; elem
< nelems
; ++elem
)
869 if (elem
< _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
))
872 #define CTYPE_EMPTY(name) \
874 iov[2 + elem + offset].iov_base = (void *) ""; \
875 iov[2 + elem + offset].iov_len = 0; \
876 idx[elem + 1] = idx[elem]; \
879 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
880 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
881 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
883 #define CTYPE_DATA(name, base, len) \
884 case _NL_ITEM_INDEX (name): \
885 iov[2 + elem + offset].iov_base = (base); \
886 iov[2 + elem + offset].iov_len = (len); \
887 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
890 CTYPE_DATA (_NL_CTYPE_CLASS
,
892 (256 + 128) * sizeof (char_class_t
));
894 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
896 (256 + 128) * sizeof (uint32_t));
897 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
899 (256 + 128) * sizeof (uint32_t));
901 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
903 (oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 256)
904 * sizeof (uint32_t));
905 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
907 (oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 256)
908 * sizeof (uint32_t));
910 CTYPE_DATA (_NL_CTYPE_CLASS32
,
912 (oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 256)
913 * sizeof (char_class32_t
));
915 CTYPE_DATA (_NL_CTYPE_NAMES
,
917 (oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 0)
918 * sizeof (uint32_t));
920 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
921 &ctype
->class_offset
, sizeof (uint32_t));
923 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
924 &ctype
->map_offset
, sizeof (uint32_t));
926 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
927 &ctype
->translit_idx_size
, sizeof (uint32_t));
929 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
930 ctype
->translit_from_idx
,
931 ctype
->translit_idx_size
* sizeof (uint32_t));
933 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
934 ctype
->translit_from_tbl
,
935 ctype
->translit_from_tbl_size
);
937 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
938 ctype
->translit_to_idx
,
939 ctype
->translit_idx_size
* sizeof (uint32_t));
941 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
942 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
944 CTYPE_DATA (_NL_CTYPE_HASH_SIZE
,
945 &ctype
->plane_size
, sizeof (uint32_t));
946 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS
,
947 &ctype
->plane_cnt
, sizeof (uint32_t));
949 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
950 /* The class name array. */
952 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
954 iov
[2 + elem
+ offset
].iov_base
955 = (void *) ctype
->classnames
[cnt
];
956 iov
[2 + elem
+ offset
].iov_len
957 = strlen (ctype
->classnames
[cnt
]) + 1;
958 total
+= iov
[2 + elem
+ offset
].iov_len
;
960 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
961 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
962 total
+= 1 + (4 - ((total
+ 1) % 4));
964 idx
[elem
+ 1] = idx
[elem
] + total
;
967 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
968 /* The class name array. */
970 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
972 iov
[2 + elem
+ offset
].iov_base
973 = (void *) ctype
->mapnames
[cnt
];
974 iov
[2 + elem
+ offset
].iov_len
975 = strlen (ctype
->mapnames
[cnt
]) + 1;
976 total
+= iov
[2 + elem
+ offset
].iov_len
;
978 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
979 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
980 total
+= 1 + (4 - ((total
+ 1) % 4));
982 idx
[elem
+ 1] = idx
[elem
] + total
;
985 CTYPE_DATA (_NL_CTYPE_WIDTH
,
988 : ctype
->width_3level
.iov_base
),
990 ? (ctype
->plane_size
* ctype
->plane_cnt
+ 3) & ~3ul
991 : ctype
->width_3level
.iov_len
));
993 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
994 &ctype
->mb_cur_max
, sizeof (uint32_t));
996 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
997 total
= strlen (ctype
->codeset_name
) + 1;
999 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1002 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1003 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1004 ctype
->codeset_name
, total
),
1005 '\0', 4 - (total
& 3));
1006 total
= (total
+ 3) & ~3;
1008 iov
[2 + elem
+ offset
].iov_len
= total
;
1009 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1012 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1013 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1014 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1015 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1016 ctype
->mbdigits_act
/ 10;
1017 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1020 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1021 /* Align entries. */
1022 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1023 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1024 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1027 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1028 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1029 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1030 ctype
->wcdigits_act
/ 10;
1031 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1034 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1035 /* Compute the length of all possible characters. For INDIGITS
1036 there might be more than one. We simply concatenate all of
1037 them with a NUL byte following. The NUL byte wouldn't be
1038 necessary but it makes it easier for the user. */
1041 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1042 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1043 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1044 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1045 iov
[2 + elem
+ offset
].iov_len
= total
;
1047 cp
= iov
[2 + elem
+ offset
].iov_base
;
1048 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1049 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1051 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1052 ctype
->mbdigits
[cnt
]->nbytes
);
1055 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1058 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1059 /* Compute the length of all possible characters. For INDIGITS
1060 there might be more than one. We simply concatenate all of
1061 them with a NUL byte following. The NUL byte wouldn't be
1062 necessary but it makes it easier for the user. */
1063 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1064 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1065 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1066 iov
[2 + elem
+ offset
].iov_len
= total
;
1068 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1069 ctype
->mboutdigits
[cnt
]->bytes
,
1070 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1071 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1074 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1075 total
= ctype
->wcdigits_act
/ 10;
1077 iov
[2 + elem
+ offset
].iov_base
=
1078 (uint32_t *) alloca (total
* sizeof (uint32_t));
1079 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1081 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1082 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1083 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1084 = ctype
->wcdigits
[cnt
];
1085 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1088 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1089 /* Align entries. */
1090 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1091 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1092 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1096 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1097 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1098 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1099 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1100 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1103 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1104 default_missing_len
= (ctype
->default_missing
1105 ? wcslen ((wchar_t *)ctype
->default_missing
)
1107 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1108 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1109 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1112 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1113 iov
[2 + elem
+ offset
].iov_base
=
1114 ctype
->default_missing
?: (uint32_t *) L
"";
1115 iov
[2 + elem
+ offset
].iov_len
=
1116 wcslen (iov
[2 + elem
+ offset
].iov_base
);
1117 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1120 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1121 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1122 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1123 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1126 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1128 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1129 * 3 * sizeof (uint32_t));
1130 struct translit_ignore_t
*runp
;
1132 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1133 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1134 * 3 * sizeof (uint32_t));
1136 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1139 *ranges
++ = runp
->from
;
1140 *ranges
++ = runp
->to
;
1141 *ranges
++ = runp
->step
;
1144 /* Remove the following line in case a new entry is added
1145 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1147 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1151 assert (! "unknown CTYPE element");
1155 /* Handle extra maps. */
1156 if (oldstyle_tables
)
1158 size_t nr
= (elem
- _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
)) + 2;
1160 iov
[2 + elem
+ offset
].iov_base
= ctype
->map32
[nr
];
1161 iov
[2 + elem
+ offset
].iov_len
= ((ctype
->plane_size
1163 * sizeof (uint32_t));
1165 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1169 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
);
1170 if (nr
< ctype
->nr_charclass
)
1172 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1176 nr
-= ctype
->nr_charclass
;
1177 assert (nr
< ctype
->map_collection_nr
);
1178 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1180 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1185 assert (2 + elem
+ offset
== (nelems
+ ctype
->nr_charclass
1186 + ctype
->map_collection_nr
+ 2 + 2));
1188 write_locale_data (output_path
, "LC_CTYPE", 2 + elem
+ offset
, iov
);
1192 /* Local functions. */
1194 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1199 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1200 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1203 if (cnt
< ctype
->nr_charclass
)
1205 lr_error (lr
, _("character class `%s' already defined"), name
);
1209 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1210 /* Exit code 2 is prescribed in P1003.2b. */
1212 implementation limit: no more than %Zd character classes allowed"),
1215 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1220 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1221 const char *name
, struct charmap_t
*charmap
)
1223 size_t max_chars
= 0;
1226 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1228 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1231 if (max_chars
< ctype
->map_collection_max
[cnt
])
1232 max_chars
= ctype
->map_collection_max
[cnt
];
1235 if (cnt
< ctype
->map_collection_nr
)
1237 lr_error (lr
, _("character map `%s' already defined"), name
);
1241 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1242 /* Exit code 2 is prescribed in P1003.2b. */
1244 implementation limit: no more than %d character maps allowed"),
1247 ctype
->mapnames
[cnt
] = name
;
1250 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1252 ctype
->map_collection_max
[cnt
] = max_chars
;
1254 ctype
->map_collection
[cnt
] = (uint32_t *)
1255 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1256 ctype
->map_collection_act
[cnt
] = 256;
1258 ++ctype
->map_collection_nr
;
1262 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1263 is possible if we only want to extend the name array. */
1265 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1266 size_t *act
, uint32_t idx
)
1271 return table
== NULL
? NULL
: &(*table
)[idx
];
1273 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1274 if (ctype
->charnames
[cnt
] == idx
)
1277 /* We have to distinguish two cases: the name is found or not. */
1278 if (cnt
== ctype
->charnames_act
)
1280 /* Extend the name array. */
1281 if (ctype
->charnames_act
== ctype
->charnames_max
)
1283 ctype
->charnames_max
*= 2;
1284 ctype
->charnames
= (uint32_t *)
1285 xrealloc (ctype
->charnames
,
1286 sizeof (uint32_t) * ctype
->charnames_max
);
1288 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1292 /* We have done everything we are asked to do. */
1299 size_t old_max
= *max
;
1302 while (*max
<= cnt
);
1305 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1306 memset (&(*table
)[old_max
], '\0',
1307 (*max
- old_max
) * sizeof (uint32_t));
1313 return &(*table
)[cnt
];
1318 get_character (struct token
*now
, struct charmap_t
*charmap
,
1319 struct repertoire_t
*repertoire
,
1320 struct charseq
**seqp
, uint32_t *wchp
)
1322 if (now
->tok
== tok_bsymbol
)
1324 /* This will hopefully be the normal case. */
1325 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1326 now
->val
.str
.lenmb
);
1327 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1328 now
->val
.str
.lenmb
);
1330 else if (now
->tok
== tok_ucs4
)
1334 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1335 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1338 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1342 /* Compute the value in the charmap from the UCS value. */
1343 const char *symbol
= repertoire_find_symbol (repertoire
,
1349 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1353 if (repertoire
!= NULL
)
1355 /* Insert a negative entry. */
1356 static const struct charseq negative
1357 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1358 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1360 *newp
= now
->val
.ucs4
;
1362 insert_entry (&repertoire
->seq_table
, newp
,
1363 sizeof (uint32_t), (void *) &negative
);
1367 (*seqp
)->ucs4
= now
->val
.ucs4
;
1369 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1372 *wchp
= now
->val
.ucs4
;
1374 else if (now
->tok
== tok_charcode
)
1376 /* We must map from the byte code to UCS4. */
1377 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1378 now
->val
.str
.lenmb
);
1381 *wchp
= ILLEGAL_CHAR_VALUE
;
1384 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1385 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1386 strlen ((*seqp
)->name
));
1387 *wchp
= (*seqp
)->ucs4
;
1397 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1398 the .(2). counterparts. */
1400 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1401 struct locale_ctype_t
*ctype
,
1402 struct charmap_t
*charmap
,
1403 struct repertoire_t
*repertoire
,
1405 const char *last_str
,
1406 unsigned long int class256_bit
,
1407 unsigned long int class_bit
, int base
,
1408 int ignore_content
, int handle_digits
, int step
)
1410 const char *nowstr
= now
->val
.str
.startmb
;
1411 char tmp
[now
->val
.str
.lenmb
+ 1];
1414 unsigned long int from
;
1415 unsigned long int to
;
1417 /* We have to compute the ellipsis values using the symbolic names. */
1418 assert (last_str
!= NULL
);
1420 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1424 _("`%s' and `%.*s' are no valid names for symbolic range"),
1425 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1429 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1430 /* Nothing to do, the names are the same. */
1433 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1437 from
= strtoul (cp
, &endp
, base
);
1438 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1441 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1442 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1443 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1446 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1447 if (!ignore_content
)
1449 now
->val
.str
.startmb
= tmp
;
1450 while ((from
+= step
) <= to
)
1452 struct charseq
*seq
;
1455 sprintf (tmp
, (base
== 10 ? "%.*s%0*d" : "%.*s%0*X"), cp
- last_str
,
1456 last_str
, now
->val
.str
.lenmb
- (cp
- last_str
), from
);
1458 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1460 if (seq
!= NULL
&& seq
->nbytes
== 1)
1461 /* Yep, we can store information about this byte sequence. */
1462 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1464 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1465 /* We have the UCS4 position. */
1466 *find_idx (ctype
, &ctype
->class_collection
,
1467 &ctype
->class_collection_max
,
1468 &ctype
->class_collection_act
, wch
) |= class_bit
;
1470 if (handle_digits
== 1)
1472 /* We must store the digit values. */
1473 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1475 ctype
->mbdigits_max
*= 2;
1476 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1477 (ctype
->mbdigits_max
1478 * sizeof (char *)));
1479 ctype
->wcdigits_max
*= 2;
1480 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1481 (ctype
->wcdigits_max
1482 * sizeof (uint32_t)));
1485 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1486 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1488 else if (handle_digits
== 2)
1490 /* We must store the digit values. */
1491 if (ctype
->outdigits_act
>= 10)
1493 lr_error (ldfile
, _("\
1494 %s: field `%s' does not contain exactly ten entries"),
1495 "LC_CTYPE", "outdigit");
1499 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1500 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1501 ++ctype
->outdigits_act
;
1508 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1510 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1511 struct locale_ctype_t
*ctype
,
1512 struct charmap_t
*charmap
,
1513 struct repertoire_t
*repertoire
,
1514 struct token
*now
, uint32_t last_wch
,
1515 unsigned long int class256_bit
,
1516 unsigned long int class_bit
, int ignore_content
,
1517 int handle_digits
, int step
)
1519 if (last_wch
> now
->val
.ucs4
)
1521 lr_error (ldfile
, _("\
1522 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1523 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1524 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1528 if (!ignore_content
)
1529 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1531 /* We have to find out whether there is a byte sequence corresponding
1532 to this UCS4 value. */
1533 struct charseq
*seq
;
1536 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1537 seq
= charmap_find_value (charmap
, utmp
, 9);
1540 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1541 seq
= charmap_find_value (charmap
, utmp
, 5);
1545 /* Try looking in the repertoire map. */
1546 seq
= repertoire_find_seq (repertoire
, last_wch
);
1548 /* If this is the first time we look for this sequence create a new
1552 static const struct charseq negative
1553 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1555 /* Find the symbolic name for this UCS4 value. */
1556 if (repertoire
!= NULL
)
1558 const char *symbol
= repertoire_find_symbol (repertoire
,
1560 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1565 /* We have a name, now search the multibyte value. */
1566 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1569 /* We have to create a fake entry. */
1570 seq
= (struct charseq
*) &negative
;
1572 seq
->ucs4
= last_wch
;
1574 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1578 /* We have to create a fake entry. */
1579 seq
= (struct charseq
*) &negative
;
1582 /* We have a name, now search the multibyte value. */
1583 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1584 /* Yep, we can store information about this byte sequence. */
1585 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1588 /* And of course we have the UCS4 position. */
1590 *find_idx (ctype
, &ctype
->class_collection
,
1591 &ctype
->class_collection_max
,
1592 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1594 if (handle_digits
== 1)
1596 /* We must store the digit values. */
1597 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1599 ctype
->mbdigits_max
*= 2;
1600 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1601 (ctype
->mbdigits_max
1602 * sizeof (char *)));
1603 ctype
->wcdigits_max
*= 2;
1604 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1605 (ctype
->wcdigits_max
1606 * sizeof (uint32_t)));
1609 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1611 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1613 else if (handle_digits
== 2)
1615 /* We must store the digit values. */
1616 if (ctype
->outdigits_act
>= 10)
1618 lr_error (ldfile
, _("\
1619 %s: field `%s' does not contain exactly ten entries"),
1620 "LC_CTYPE", "outdigit");
1624 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1626 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1627 ++ctype
->outdigits_act
;
1633 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1635 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1636 struct locale_ctype_t
*ctype
,
1637 struct charmap_t
*charmap
,
1638 struct repertoire_t
*repertoire
,
1639 struct token
*now
, char *last_charcode
,
1640 uint32_t last_charcode_len
,
1641 unsigned long int class256_bit
,
1642 unsigned long int class_bit
, int ignore_content
,
1645 /* First check whether the to-value is larger. */
1646 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1648 lr_error (ldfile
, _("\
1649 start end end character sequence of range must have the same length"));
1653 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1655 lr_error (ldfile
, _("\
1656 to-value character sequence is smaller than from-value sequence"));
1660 if (!ignore_content
)
1664 /* Increment the byte sequence value. */
1665 struct charseq
*seq
;
1669 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1670 if (++last_charcode
[i
] != 0)
1673 if (last_charcode_len
== 1)
1674 /* Of course we have the charcode value. */
1675 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1678 /* Find the symbolic name. */
1679 seq
= charmap_find_symbol (charmap
, last_charcode
,
1683 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1684 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1685 strlen (seq
->name
));
1686 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1688 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1689 *find_idx (ctype
, &ctype
->class_collection
,
1690 &ctype
->class_collection_max
,
1691 &ctype
->class_collection_act
, wch
) |= class_bit
;
1694 wch
= ILLEGAL_CHAR_VALUE
;
1696 if (handle_digits
== 1)
1698 /* We must store the digit values. */
1699 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1701 ctype
->mbdigits_max
*= 2;
1702 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1703 (ctype
->mbdigits_max
1704 * sizeof (char *)));
1705 ctype
->wcdigits_max
*= 2;
1706 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1707 (ctype
->wcdigits_max
1708 * sizeof (uint32_t)));
1711 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1712 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1713 seq
->nbytes
= last_charcode_len
;
1715 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1716 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1718 else if (handle_digits
== 2)
1720 struct charseq
*seq
;
1721 /* We must store the digit values. */
1722 if (ctype
->outdigits_act
>= 10)
1724 lr_error (ldfile
, _("\
1725 %s: field `%s' does not contain exactly ten entries"),
1726 "LC_CTYPE", "outdigit");
1730 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1731 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1732 seq
->nbytes
= last_charcode_len
;
1734 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1735 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1736 ++ctype
->outdigits_act
;
1739 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1740 last_charcode_len
) != 0);
1745 /* Read one transliteration entry. */
1747 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1748 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
)
1752 if (now
->tok
== tok_default_missing
)
1753 /* The special name "" will denote this case. */
1754 wstr
= ((uint32_t *) { 0 });
1755 else if (now
->tok
== tok_bsymbol
)
1757 /* Get the value from the repertoire. */
1758 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1759 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1760 now
->val
.str
.lenmb
);
1761 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1763 /* We cannot proceed, we don't know the UCS4 value. */
1770 else if (now
->tok
== tok_ucs4
)
1772 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1773 wstr
[0] = now
->val
.ucs4
;
1776 else if (now
->tok
== tok_charcode
)
1778 /* Argh, we have to convert to the symbol name first and then to the
1780 struct charseq
*seq
= charmap_find_symbol (charmap
,
1781 now
->val
.str
.startmb
,
1782 now
->val
.str
.lenmb
);
1784 /* Cannot find the UCS4 value. */
1787 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1788 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1789 strlen (seq
->name
));
1790 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1791 /* We cannot proceed, we don't know the UCS4 value. */
1794 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1795 wstr
[0] = seq
->ucs4
;
1798 else if (now
->tok
== tok_string
)
1800 wstr
= now
->val
.str
.startwc
;
1801 if (wstr
== NULL
|| wstr
[0] == 0)
1806 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1807 lr_ignore_rest (ldfile
, 0);
1808 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1809 return (uint32_t *) -1l;
1817 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1818 struct token
*now
, struct charmap_t
*charmap
,
1819 struct repertoire_t
*repertoire
)
1821 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1822 struct translit_t
*result
;
1823 struct translit_to_t
**top
;
1824 struct obstack
*ob
= &ctype
->mempool
;
1828 if (from_wstr
== NULL
)
1829 /* There is no valid from string. */
1832 result
= (struct translit_t
*) obstack_alloc (ob
,
1833 sizeof (struct translit_t
));
1834 result
->from
= from_wstr
;
1835 result
->fname
= ldfile
->fname
;
1836 result
->lineno
= ldfile
->lineno
;
1837 result
->next
= NULL
;
1847 /* Next we have one or more transliterations. They are
1848 separated by semicolons. */
1849 now
= lr_token (ldfile
, charmap
, repertoire
);
1851 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1853 /* One string read. */
1854 const uint32_t zero
= 0;
1858 obstack_grow (ob
, &zero
, 4);
1859 to_wstr
= obstack_finish (ob
);
1861 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1862 (*top
)->str
= to_wstr
;
1863 (*top
)->next
= NULL
;
1866 if (now
->tok
== tok_eol
)
1868 result
->next
= ctype
->translit
;
1869 ctype
->translit
= result
;
1874 top
= &(*top
)->next
;
1879 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1880 if (to_wstr
== (uint32_t *) -1l)
1882 /* An error occurred. */
1883 obstack_free (ob
, result
);
1887 if (to_wstr
== NULL
)
1890 /* This value is usable. */
1891 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1900 read_translit_ignore_entry (struct linereader
*ldfile
,
1901 struct locale_ctype_t
*ctype
,
1902 struct charmap_t
*charmap
,
1903 struct repertoire_t
*repertoire
)
1905 /* We expect a semicolon-separated list of characters we ignore. We are
1906 only interested in the wide character definitions. These must be
1907 single characters, possibly defining a range when an ellipsis is used. */
1910 struct token
*now
= lr_token (ldfile
, charmap
, repertoire
);
1911 struct translit_ignore_t
*newp
;
1914 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1917 _("premature end of `translit_ignore' definition"));
1921 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1923 lr_error (ldfile
, _("syntax error"));
1924 lr_ignore_rest (ldfile
, 0);
1928 if (now
->tok
== tok_ucs4
)
1929 from
= now
->val
.ucs4
;
1931 /* Try to get the value. */
1932 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1933 now
->val
.str
.lenmb
);
1935 if (from
== ILLEGAL_CHAR_VALUE
)
1937 lr_error (ldfile
, "invalid character name");
1942 newp
= (struct translit_ignore_t
*)
1943 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
1948 newp
->next
= ctype
->translit_ignore
;
1949 ctype
->translit_ignore
= newp
;
1952 /* Now we expect either a semicolon, an ellipsis, or the end of the
1954 now
= lr_token (ldfile
, charmap
, repertoire
);
1956 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
1958 /* XXX Should we bother implementing `....'? `...' certainly
1959 will not be implemented. */
1961 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
1963 now
= lr_token (ldfile
, charmap
, repertoire
);
1965 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1968 _("premature end of `translit_ignore' definition"));
1972 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1974 lr_error (ldfile
, _("syntax error"));
1975 lr_ignore_rest (ldfile
, 0);
1979 if (now
->tok
== tok_ucs4
)
1982 /* Try to get the value. */
1983 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1984 now
->val
.str
.lenmb
);
1986 if (to
== ILLEGAL_CHAR_VALUE
)
1987 lr_error (ldfile
, "invalid character name");
1990 /* Make sure the `to'-value is larger. */
1997 lr_error (ldfile
, _("\
1998 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1999 (to
| from
) < 65536 ? 4 : 8, to
,
2000 (to
| from
) < 65536 ? 4 : 8, from
);
2003 /* And the next token. */
2004 now
= lr_token (ldfile
, charmap
, repertoire
);
2007 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2011 if (now
->tok
== tok_semicolon
)
2015 /* If we come here something is wrong. */
2016 lr_error (ldfile
, _("syntax error"));
2017 lr_ignore_rest (ldfile
, 0);
2023 /* The parser for the LC_CTYPE section of the locale definition. */
2025 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2026 struct charmap_t
*charmap
, const char *repertoire_name
,
2029 struct repertoire_t
*repertoire
= NULL
;
2030 struct locale_ctype_t
*ctype
;
2032 enum token_t nowtok
;
2034 struct charseq
*last_seq
;
2035 uint32_t last_wch
= 0;
2036 enum token_t last_token
;
2037 enum token_t ellipsis_token
;
2039 char last_charcode
[16];
2040 size_t last_charcode_len
= 0;
2041 const char *last_str
= NULL
;
2044 /* Get the repertoire we have to use. */
2045 if (repertoire_name
!= NULL
)
2046 repertoire
= repertoire_read (repertoire_name
);
2048 /* The rest of the line containing `LC_CTYPE' must be free. */
2049 lr_ignore_rest (ldfile
, 1);
2054 now
= lr_token (ldfile
, charmap
, NULL
);
2057 while (nowtok
== tok_eol
);
2059 /* If we see `copy' now we are almost done. */
2060 if (nowtok
== tok_copy
)
2062 handle_copy (ldfile
, charmap
, repertoire_name
, result
, tok_lc_ctype
,
2063 LC_CTYPE
, "LC_CTYPE", ignore_content
);
2067 /* Prepare the data structures. */
2068 ctype_startup (ldfile
, result
, charmap
, ignore_content
);
2069 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2071 /* Remember the repertoire we use. */
2072 if (!ignore_content
)
2073 ctype
->repertoire
= repertoire
;
2077 unsigned long int class_bit
= 0;
2078 unsigned long int class256_bit
= 0;
2079 int handle_digits
= 0;
2081 /* Of course we don't proceed beyond the end of file. */
2082 if (nowtok
== tok_eof
)
2085 /* Ingore empty lines. */
2086 if (nowtok
== tok_eol
)
2088 now
= lr_token (ldfile
, charmap
, NULL
);
2096 now
= lr_token (ldfile
, charmap
, NULL
);
2097 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2099 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2100 now
= lr_token (ldfile
, charmap
, NULL
);
2101 if (now
->tok
!= tok_semicolon
)
2103 now
= lr_token (ldfile
, charmap
, NULL
);
2105 if (now
->tok
!= tok_eol
)
2107 %s: syntax error in definition of new character class"), "LC_CTYPE");
2111 now
= lr_token (ldfile
, charmap
, NULL
);
2112 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2114 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2115 now
= lr_token (ldfile
, charmap
, NULL
);
2116 if (now
->tok
!= tok_semicolon
)
2118 now
= lr_token (ldfile
, charmap
, NULL
);
2120 if (now
->tok
!= tok_eol
)
2122 %s: syntax error in definition of new character map"), "LC_CTYPE");
2126 /* Ignore the rest of the line if we don't need the input of
2130 lr_ignore_rest (ldfile
, 0);
2134 /* We simply forget the `class' keyword and use the following
2135 operand to determine the bit. */
2136 now
= lr_token (ldfile
, charmap
, NULL
);
2137 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2139 /* Must can be one of the predefined class names. */
2140 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2141 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2143 if (cnt
>= ctype
->nr_charclass
)
2145 #ifdef PREDEFINED_CLASSES
2146 if (now
->val
.str
.lenmb
== 8
2147 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2148 class_bit
= _ISwspecial1
;
2149 else if (now
->val
.str
.lenmb
== 8
2150 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2151 class_bit
= _ISwspecial2
;
2152 else if (now
->val
.str
.lenmb
== 8
2153 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2154 class_bit
= _ISwspecial3
;
2158 /* OK, it's a new class. */
2159 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2161 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2166 class_bit
= _ISwbit (cnt
);
2168 free (now
->val
.str
.startmb
);
2171 else if (now
->tok
== tok_digit
)
2172 goto handle_tok_digit
;
2173 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2177 class_bit
= BITw (now
->tok
);
2178 class256_bit
= BIT (now
->tok
);
2181 /* The next character must be a semicolon. */
2182 now
= lr_token (ldfile
, charmap
, NULL
);
2183 if (now
->tok
!= tok_semicolon
)
2185 goto read_charclass
;
2198 /* Ignore the rest of the line if we don't need the input of
2202 lr_ignore_rest (ldfile
, 0);
2206 class_bit
= BITw (now
->tok
);
2207 class256_bit
= BIT (now
->tok
);
2210 ctype
->class_done
|= class_bit
;
2211 last_token
= tok_none
;
2212 ellipsis_token
= tok_none
;
2214 now
= lr_token (ldfile
, charmap
, NULL
);
2215 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2218 struct charseq
*seq
;
2220 if (ellipsis_token
== tok_none
)
2222 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2225 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2226 /* Yep, we can store information about this byte
2228 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2230 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2232 /* We have the UCS4 position. */
2233 *find_idx (ctype
, &ctype
->class_collection
,
2234 &ctype
->class_collection_max
,
2235 &ctype
->class_collection_act
, wch
) |= class_bit
;
2237 last_token
= now
->tok
;
2238 /* Terminate the string. */
2239 if (last_token
== tok_bsymbol
)
2241 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2242 last_str
= now
->val
.str
.startmb
;
2248 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2249 last_charcode_len
= now
->val
.charcode
.nbytes
;
2251 if (!ignore_content
&& handle_digits
== 1)
2253 /* We must store the digit values. */
2254 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2256 ctype
->mbdigits_max
+= 10;
2257 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2258 (ctype
->mbdigits_max
2259 * sizeof (char *)));
2260 ctype
->wcdigits_max
+= 10;
2261 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2262 (ctype
->wcdigits_max
2263 * sizeof (uint32_t)));
2266 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2267 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2269 else if (!ignore_content
&& handle_digits
== 2)
2271 /* We must store the digit values. */
2272 if (ctype
->outdigits_act
>= 10)
2274 lr_error (ldfile
, _("\
2275 %s: field `%s' does not contain exactly ten entries"),
2276 "LC_CTYPE", "outdigit");
2280 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2281 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2282 ++ctype
->outdigits_act
;
2287 /* Now it gets complicated. We have to resolve the
2288 ellipsis problem. First we must distinguish between
2289 the different kind of ellipsis and this must match the
2290 tokens we have seen. */
2291 assert (last_token
!= tok_none
);
2293 if (last_token
!= now
->tok
)
2295 lr_error (ldfile
, _("\
2296 ellipsis range must be marked by two operands of same type"));
2297 lr_ignore_rest (ldfile
, 0);
2301 if (last_token
== tok_bsymbol
)
2303 if (ellipsis_token
== tok_ellipsis3
)
2304 lr_error (ldfile
, _("with symbolic name range values \
2305 the absolute ellipsis `...' must not be used"));
2307 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2308 repertoire
, now
, last_str
,
2309 class256_bit
, class_bit
,
2314 handle_digits
, step
);
2316 else if (last_token
== tok_ucs4
)
2318 if (ellipsis_token
!= tok_ellipsis2
)
2319 lr_error (ldfile
, _("\
2320 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2322 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2323 repertoire
, now
, last_wch
,
2324 class256_bit
, class_bit
,
2325 ignore_content
, handle_digits
,
2330 assert (last_token
== tok_charcode
);
2332 if (ellipsis_token
!= tok_ellipsis3
)
2333 lr_error (ldfile
, _("\
2334 with character code range values one must use the absolute ellipsis `...'"));
2336 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2340 class256_bit
, class_bit
,
2345 /* Now we have used the last value. */
2346 last_token
= tok_none
;
2349 /* Next we expect a semicolon or the end of the line. */
2350 now
= lr_token (ldfile
, charmap
, NULL
);
2351 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2354 if (last_token
!= tok_none
2355 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2357 if (now
->tok
== tok_ellipsis2_2
)
2359 now
->tok
= tok_ellipsis2
;
2362 else if (now
->tok
== tok_ellipsis4_2
)
2364 now
->tok
= tok_ellipsis4
;
2368 ellipsis_token
= now
->tok
;
2370 now
= lr_token (ldfile
, charmap
, NULL
);
2374 if (now
->tok
!= tok_semicolon
)
2377 /* And get the next character. */
2378 now
= lr_token (ldfile
, charmap
, NULL
);
2380 ellipsis_token
= tok_none
;
2386 /* Ignore the rest of the line if we don't need the input of
2390 lr_ignore_rest (ldfile
, 0);
2395 class_bit
= _ISwdigit
;
2396 class256_bit
= _ISdigit
;
2398 goto read_charclass
;
2401 /* Ignore the rest of the line if we don't need the input of
2405 lr_ignore_rest (ldfile
, 0);
2409 if (ctype
->outdigits_act
!= 0)
2410 lr_error (ldfile
, _("\
2411 %s: field `%s' declared more than once"),
2412 "LC_CTYPE", "outdigit");
2416 goto read_charclass
;
2419 /* Ignore the rest of the line if we don't need the input of
2423 lr_ignore_rest (ldfile
, 0);
2431 /* Ignore the rest of the line if we don't need the input of
2435 lr_ignore_rest (ldfile
, 0);
2443 /* Ignore the rest of the line if we don't need the input of
2447 lr_ignore_rest (ldfile
, 0);
2451 /* We simply forget the `map' keyword and use the following
2452 operand to determine the mapping. */
2453 now
= lr_token (ldfile
, charmap
, NULL
);
2454 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2458 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2459 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2462 if (cnt
< ctype
->map_collection_nr
)
2463 free (now
->val
.str
.startmb
);
2465 /* OK, it's a new map. */
2466 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2470 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2473 mapidx
= now
->tok
- tok_toupper
;
2475 now
= lr_token (ldfile
, charmap
, NULL
);
2476 /* This better should be a semicolon. */
2477 if (now
->tok
!= tok_semicolon
)
2481 /* Test whether this mapping was already defined. */
2482 if (ctype
->tomap_done
[mapidx
])
2484 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2485 ctype
->mapnames
[mapidx
]);
2486 lr_ignore_rest (ldfile
, 0);
2489 ctype
->tomap_done
[mapidx
] = 1;
2491 now
= lr_token (ldfile
, charmap
, NULL
);
2492 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2494 struct charseq
*from_seq
;
2496 struct charseq
*to_seq
;
2499 /* Every pair starts with an opening brace. */
2500 if (now
->tok
!= tok_open_brace
)
2503 /* Next comes the from-value. */
2504 now
= lr_token (ldfile
, charmap
, NULL
);
2505 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2509 /* The next is a comma. */
2510 now
= lr_token (ldfile
, charmap
, NULL
);
2511 if (now
->tok
!= tok_comma
)
2514 /* And the other value. */
2515 now
= lr_token (ldfile
, charmap
, NULL
);
2516 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2520 /* And the last thing is the closing brace. */
2521 now
= lr_token (ldfile
, charmap
, NULL
);
2522 if (now
->tok
!= tok_close_brace
)
2525 if (!ignore_content
)
2527 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2528 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2529 /* We can use this value. */
2530 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2533 if (from_wch
!= ILLEGAL_CHAR_VALUE
2534 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2535 /* Both correct values. */
2536 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2537 &ctype
->map_collection_max
[mapidx
],
2538 &ctype
->map_collection_act
[mapidx
],
2542 /* Now comes a semicolon or the end of the line/file. */
2543 now
= lr_token (ldfile
, charmap
, NULL
);
2544 if (now
->tok
== tok_semicolon
)
2545 now
= lr_token (ldfile
, charmap
, NULL
);
2549 case tok_translit_start
:
2550 /* Ignore the rest of the line if we don't need the input of
2554 lr_ignore_rest (ldfile
, 0);
2558 /* The rest of the line better should be empty. */
2559 lr_ignore_rest (ldfile
, 1);
2561 /* We count here the number of allocated entries in the `translit'
2565 /* We proceed until we see the `translit_end' token. */
2566 while (now
= lr_token (ldfile
, charmap
, repertoire
),
2567 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2569 if (now
->tok
== tok_eol
)
2570 /* Ignore empty lines. */
2573 if (now
->tok
== tok_translit_end
)
2575 lr_ignore_rest (ldfile
, 0);
2579 if (now
->tok
== tok_include
)
2581 /* We have to include locale. */
2582 const char *locale_name
;
2583 const char *repertoire_name
;
2585 now
= lr_token (ldfile
, charmap
, NULL
);
2586 /* This should be a string or an identifier. In any
2587 case something to name a locale. */
2588 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2591 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2592 lr_ignore_rest (ldfile
, 0);
2595 locale_name
= now
->val
.str
.startmb
;
2597 /* Next should be a semicolon. */
2598 now
= lr_token (ldfile
, charmap
, NULL
);
2599 if (now
->tok
!= tok_semicolon
)
2600 goto translit_syntax
;
2602 /* Now the repertoire name. */
2603 now
= lr_token (ldfile
, charmap
, NULL
);
2604 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2605 || now
->val
.str
.startmb
== NULL
)
2606 goto translit_syntax
;
2607 repertoire_name
= now
->val
.str
.startmb
;
2609 /* We must not have more than one `include'. */
2610 if (ctype
->translit_copy_locale
!= NULL
)
2612 lr_error (ldfile
, _("\
2613 %s: only one `include' instruction allowed"), "LC_CTYPE");
2614 lr_ignore_rest (ldfile
, 0);
2618 ctype
->translit_copy_locale
= locale_name
;
2619 ctype
->translit_copy_repertoire
= repertoire_name
;
2621 /* The rest of the line must be empty. */
2622 lr_ignore_rest (ldfile
, 1);
2624 /* Make sure the locale is read. */
2625 add_to_readlist (LC_CTYPE
, ctype
->translit_copy_locale
,
2626 repertoire_name
, 1, NULL
);
2629 else if (now
->tok
== tok_default_missing
)
2633 /* We expect a single character or string as the
2635 now
= lr_token (ldfile
, charmap
, NULL
);
2636 wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
2640 if (ctype
->default_missing
!= NULL
)
2642 lr_error (ldfile
, _("\
2643 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2644 error_at_line (0, 0, ctype
->default_missing_file
,
2645 ctype
->default_missing_lineno
,
2646 _("previous definition was here"));
2650 ctype
->default_missing
= wstr
;
2651 ctype
->default_missing_file
= ldfile
->fname
;
2652 ctype
->default_missing_lineno
= ldfile
->lineno
;
2655 lr_ignore_rest (ldfile
, 1);
2658 else if (now
->tok
== tok_translit_ignore
)
2660 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2665 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2670 /* Ignore the rest of the line if we don't need the input of
2674 lr_ignore_rest (ldfile
, 0);
2678 /* This could mean one of several things. First test whether
2679 it's a character class name. */
2680 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2681 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2683 if (cnt
< ctype
->nr_charclass
)
2685 class_bit
= _ISwbit (cnt
);
2686 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2687 free (now
->val
.str
.startmb
);
2688 goto read_charclass
;
2690 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2691 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2693 if (cnt
< ctype
->map_collection_nr
)
2696 free (now
->val
.str
.startmb
);
2699 #ifdef PREDEFINED_CLASSES
2700 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2702 class_bit
= _ISwspecial1
;
2703 free (now
->val
.str
.startmb
);
2704 goto read_charclass
;
2706 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2708 class_bit
= _ISwspecial2
;
2709 free (now
->val
.str
.startmb
);
2710 goto read_charclass
;
2712 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2714 class_bit
= _ISwspecial3
;
2715 free (now
->val
.str
.startmb
);
2716 goto read_charclass
;
2718 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2727 /* Next we assume `LC_CTYPE'. */
2728 now
= lr_token (ldfile
, charmap
, NULL
);
2729 if (now
->tok
== tok_eof
)
2731 if (now
->tok
== tok_eol
)
2732 lr_error (ldfile
, _("%s: incomplete `END' line"),
2734 else if (now
->tok
!= tok_lc_ctype
)
2735 lr_error (ldfile
, _("\
2736 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2737 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2742 if (now
->tok
!= tok_eof
)
2743 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2746 /* Prepare for the next round. */
2747 now
= lr_token (ldfile
, charmap
, NULL
);
2751 /* When we come here we reached the end of the file. */
2752 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2757 set_class_defaults (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
2758 struct repertoire_t
*repertoire
)
2762 /* These function defines the default values for the classes and conversions
2763 according to POSIX.2 2.5.2.1.
2764 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2765 Don't move them unless you know what you do! */
2767 void set_default (int bitpos
, int from
, int to
)
2771 int bit
= _ISbit (bitpos
);
2772 int bitw
= _ISwbit (bitpos
);
2773 /* Define string. */
2776 for (ch
= from
; ch
<= to
; ++ch
)
2778 struct charseq
*seq
;
2781 seq
= charmap_find_value (charmap
, tmp
, 1);
2786 %s: character `%s' not defined in charmap while needed as default value"),
2789 else if (seq
->nbytes
!= 1)
2791 %s: character `%s' in charmap not representable with one byte"),
2794 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2796 /* No need to search here, the ASCII value is also the Unicode
2798 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
2802 /* Set default values if keyword was not present. */
2803 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2804 /* "If this keyword [lower] is not specified, the lowercase letters
2805 `A' through `Z', ..., shall automatically belong to this class,
2806 with implementation defined character values." [P1003.2, 2.5.2.1] */
2807 set_default (BITPOS (tok_upper
), 'A', 'Z');
2809 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2810 /* "If this keyword [lower] is not specified, the lowercase letters
2811 `a' through `z', ..., shall automatically belong to this class,
2812 with implementation defined character values." [P1003.2, 2.5.2.1] */
2813 set_default (BITPOS (tok_lower
), 'a', 'z');
2815 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2817 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2818 class `lower' *must* be in class `alpha'. */
2819 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2820 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2822 for (cnt
= 0; cnt
< 256; ++cnt
)
2823 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2824 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2826 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2827 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2828 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2831 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2832 /* "If this keyword [digit] is not specified, the digits `0' through
2833 `9', ..., shall automatically belong to this class, with
2834 implementation-defined character values." [P1003.2, 2.5.2.1] */
2835 set_default (BITPOS (tok_digit
), '0', '9');
2837 /* "Only characters specified for the `alpha' and `digit' keyword
2838 shall be specified. Characters specified for the keyword `alpha'
2839 and `digit' are automatically included in this class. */
2841 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2842 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2844 for (cnt
= 0; cnt
< 256; ++cnt
)
2845 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2846 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2848 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2849 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2850 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2853 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2854 /* "If this keyword [space] is not specified, the characters <space>,
2855 <form-feed>, <newline>, <carriage-return>, <tab>, and
2856 <vertical-tab>, ..., shall automatically belong to this class,
2857 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2859 struct charseq
*seq
;
2861 seq
= charmap_find_value (charmap
, "space", 5);
2863 seq
= charmap_find_value (charmap
, "SP", 2);
2865 seq
= charmap_find_value (charmap
, "U00000020", 9);
2870 %s: character `%s' not defined while needed as default value"),
2871 "LC_CTYPE", "<space>");
2873 else if (seq
->nbytes
!= 1)
2875 %s: character `%s' in charmap not representable with one byte"),
2876 "LC_CTYPE", "<space>");
2878 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2880 /* No need to search. */
2881 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
2883 seq
= charmap_find_value (charmap
, "form-feed", 9);
2885 seq
= charmap_find_value (charmap
, "U0000000C", 9);
2890 %s: character `%s' not defined while needed as default value"),
2891 "LC_CTYPE", "<form-feed>");
2893 else if (seq
->nbytes
!= 1)
2895 %s: character `%s' in charmap not representable with one byte"),
2896 "LC_CTYPE", "<form-feed>");
2898 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2900 /* No need to search. */
2901 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
2904 seq
= charmap_find_value (charmap
, "newline", 7);
2906 seq
= charmap_find_value (charmap
, "U0000000A", 9);
2911 character `%s' not defined while needed as default value"),
2914 else if (seq
->nbytes
!= 1)
2916 %s: character `%s' in charmap not representable with one byte"),
2917 "LC_CTYPE", "<newline>");
2919 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2921 /* No need to search. */
2922 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
2925 seq
= charmap_find_value (charmap
, "carriage-return", 15);
2927 seq
= charmap_find_value (charmap
, "U0000000D", 9);
2932 %s: character `%s' not defined while needed as default value"),
2933 "LC_CTYPE", "<carriage-return>");
2935 else if (seq
->nbytes
!= 1)
2937 %s: character `%s' in charmap not representable with one byte"),
2938 "LC_CTYPE", "<carriage-return>");
2940 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2942 /* No need to search. */
2943 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
2946 seq
= charmap_find_value (charmap
, "tab", 3);
2948 seq
= charmap_find_value (charmap
, "U00000009", 9);
2953 %s: character `%s' not defined while needed as default value"),
2954 "LC_CTYPE", "<tab>");
2956 else if (seq
->nbytes
!= 1)
2958 %s: character `%s' in charmap not representable with one byte"),
2959 "LC_CTYPE", "<tab>");
2961 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2963 /* No need to search. */
2964 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
2967 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
2969 seq
= charmap_find_value (charmap
, "U0000000B", 9);
2974 %s: character `%s' not defined while needed as default value"),
2975 "LC_CTYPE", "<vertical-tab>");
2977 else if (seq
->nbytes
!= 1)
2979 %s: character `%s' in charmap not representable with one byte"),
2980 "LC_CTYPE", "<vertical-tab>");
2982 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2984 /* No need to search. */
2985 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
2988 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
2989 /* "If this keyword is not specified, the digits `0' to `9', the
2990 uppercase letters `A' through `F', and the lowercase letters `a'
2991 through `f', ..., shell automatically belong to this class, with
2992 implementation defined character values." [P1003.2, 2.5.2.1] */
2994 set_default (BITPOS (tok_xdigit
), '0', '9');
2995 set_default (BITPOS (tok_xdigit
), 'A', 'F');
2996 set_default (BITPOS (tok_xdigit
), 'a', 'f');
2999 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3000 /* "If this keyword [blank] is unspecified, the characters <space> and
3001 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3003 struct charseq
*seq
;
3005 seq
= charmap_find_value (charmap
, "space", 5);
3007 seq
= charmap_find_value (charmap
, "SP", 2);
3009 seq
= charmap_find_value (charmap
, "U00000020", 9);
3014 %s: character `%s' not defined while needed as default value"),
3015 "LC_CTYPE", "<space>");
3017 else if (seq
->nbytes
!= 1)
3019 %s: character `%s' in charmap not representable with one byte"),
3020 "LC_CTYPE", "<space>");
3022 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3024 /* No need to search. */
3025 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3028 seq
= charmap_find_value (charmap
, "tab", 3);
3030 seq
= charmap_find_value (charmap
, "U00000009", 9);
3035 %s: character `%s' not defined while needed as default value"),
3036 "LC_CTYPE", "<tab>");
3038 else if (seq
->nbytes
!= 1)
3040 %s: character `%s' in charmap not representable with one byte"),
3041 "LC_CTYPE", "<tab>");
3043 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3045 /* No need to search. */
3046 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3049 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3050 /* "If this keyword [graph] is not specified, characters specified for
3051 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3052 shall belong to this character class." [P1003.2, 2.5.2.1] */
3054 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3055 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3056 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3057 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3061 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3062 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3063 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3065 for (cnt
= 0; cnt
< 256; ++cnt
)
3066 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3067 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3070 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3071 /* "If this keyword [print] is not provided, characters specified for
3072 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3073 and the <space> character shall belong to this character class."
3074 [P1003.2, 2.5.2.1] */
3076 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3077 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3078 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3079 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3082 struct charseq
*seq
;
3084 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3085 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3086 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3088 for (cnt
= 0; cnt
< 256; ++cnt
)
3089 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3090 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3093 seq
= charmap_find_value (charmap
, "space", 5);
3095 seq
= charmap_find_value (charmap
, "SP", 2);
3097 seq
= charmap_find_value (charmap
, "U00000020", 9);
3102 %s: character `%s' not defined while needed as default value"),
3103 "LC_CTYPE", "<space>");
3105 else if (seq
->nbytes
!= 1)
3107 %s: character `%s' in charmap not representable with one byte"),
3108 "LC_CTYPE", "<space>");
3110 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3112 /* No need to search. */
3113 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3116 if (ctype
->tomap_done
[0] == 0)
3117 /* "If this keyword [toupper] is not specified, the lowercase letters
3118 `a' through `z', and their corresponding uppercase letters `A' to
3119 `Z', ..., shall automatically be included, with implementation-
3120 defined character values." [P1003.2, 2.5.2.1] */
3125 strcpy (tmp
, "<?>");
3127 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3129 struct charseq
*seq_from
, *seq_to
;
3133 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3134 if (seq_from
== NULL
)
3138 %s: character `%s' not defined while needed as default value"),
3141 else if (seq_from
->nbytes
!= 1)
3145 %s: character `%s' needed as default value not representable with one byte"),
3150 /* This conversion is implementation defined. */
3151 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3152 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3157 %s: character `%s' not defined while needed as default value"),
3160 else if (seq_to
->nbytes
!= 1)
3164 %s: character `%s' needed as default value not representable with one byte"),
3168 /* The index [0] is determined by the order of the
3169 `ctype_map_newP' calls in `ctype_startup'. */
3170 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3174 /* No need to search. */
3175 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3179 if (ctype
->tomap_done
[1] == 0)
3180 /* "If this keyword [tolower] is not specified, the mapping shall be
3181 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3183 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3184 if (ctype
->map_collection
[0][cnt
] != 0)
3185 ELEM (ctype
, map_collection
, [1],
3186 ctype
->map_collection
[0][cnt
])
3187 = ctype
->charnames
[cnt
];
3189 for (cnt
= 0; cnt
< 256; ++cnt
)
3190 if (ctype
->map256_collection
[0][cnt
] != 0)
3191 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3194 if (ctype
->outdigits_act
== 0)
3196 for (cnt
= 0; cnt
< 10; ++cnt
)
3198 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3201 if (ctype
->mboutdigits
[cnt
] == NULL
)
3202 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3204 strlen (longnames
[cnt
]));
3206 if (ctype
->mboutdigits
[cnt
] == NULL
)
3207 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3210 if (ctype
->mboutdigits
[cnt
] == NULL
)
3212 /* Provide a replacement. */
3214 no output digits defined and none of the standard names in the charmap"));
3216 ctype
->mboutdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
3217 sizeof (struct charseq
)
3220 /* This is better than nothing. */
3221 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3222 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3225 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3228 ctype
->outdigits_act
= 10;
3233 /* Construction of sparse 3-level tables.
3234 See wchar-lookup.h for their structure and the meaning of p and q. */
3241 /* Working representation. */
3242 size_t level1_alloc
;
3245 size_t level2_alloc
;
3248 size_t level3_alloc
;
3251 /* Compressed representation. */
3256 /* Initialize. Assumes t->p and t->q have already been set. */
3258 wctype_table_init (struct wctype_table
*t
)
3260 t
->level1_alloc
= t
->level1_size
= 0;
3261 t
->level2_alloc
= t
->level2_size
= 0;
3262 t
->level3_alloc
= t
->level3_size
= 0;
3265 /* Add one entry. */
3267 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3269 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3270 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3271 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3272 uint32_t index4
= wc
& 0x1f;
3275 if (index1
>= t
->level1_size
)
3277 if (index1
>= t
->level1_alloc
)
3279 size_t alloc
= 2 * t
->level1_alloc
;
3280 if (alloc
<= index1
)
3282 t
->level1
= (t
->level1_alloc
> 0
3283 ? (uint32_t *) xrealloc ((char *) t
->level1
,
3284 alloc
* sizeof (uint32_t))
3285 : (uint32_t *) xmalloc (alloc
* sizeof (uint32_t)));
3286 t
->level1_alloc
= alloc
;
3288 while (index1
>= t
->level1_size
)
3289 t
->level1
[t
->level1_size
++] = ~((uint32_t) 0);
3292 if (t
->level1
[index1
] == ~((uint32_t) 0))
3294 if (t
->level2_size
== t
->level2_alloc
)
3296 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3297 t
->level2
= (t
->level2_alloc
> 0
3298 ? (uint32_t *) xrealloc ((char *) t
->level2
,
3299 (alloc
<< t
->q
) * sizeof (uint32_t))
3300 : (uint32_t *) xmalloc ((alloc
<< t
->q
) * sizeof (uint32_t)));
3301 t
->level2_alloc
= alloc
;
3303 i1
= t
->level2_size
<< t
->q
;
3304 i2
= (t
->level2_size
+ 1) << t
->q
;
3305 for (i
= i1
; i
< i2
; i
++)
3306 t
->level2
[i
] = ~((uint32_t) 0);
3307 t
->level1
[index1
] = t
->level2_size
++;
3310 index2
+= t
->level1
[index1
] << t
->q
;
3312 if (t
->level2
[index2
] == ~((uint32_t) 0))
3314 if (t
->level3_size
== t
->level3_alloc
)
3316 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3317 t
->level3
= (t
->level3_alloc
> 0
3318 ? (uint32_t *) xrealloc ((char *) t
->level3
,
3319 (alloc
<< t
->p
) * sizeof (uint32_t))
3320 : (uint32_t *) xmalloc ((alloc
<< t
->p
) * sizeof (uint32_t)));
3321 t
->level3_alloc
= alloc
;
3323 i1
= t
->level3_size
<< t
->p
;
3324 i2
= (t
->level3_size
+ 1) << t
->p
;
3325 for (i
= i1
; i
< i2
; i
++)
3327 t
->level2
[index2
] = t
->level3_size
++;
3330 index3
+= t
->level2
[index2
] << t
->p
;
3332 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3335 /* Finalize and shrink. */
3337 wctype_table_finalize (struct wctype_table
*t
)
3340 uint32_t reorder3
[t
->level3_size
];
3341 uint32_t reorder2
[t
->level2_size
];
3342 uint32_t level1_offset
, level2_offset
, level3_offset
;
3344 /* Uniquify level3 blocks. */
3346 for (j
= 0; j
< t
->level3_size
; j
++)
3348 for (i
= 0; i
< k
; i
++)
3349 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3350 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3352 /* Relocate block j to block i. */
3357 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3358 (1 << t
->p
) * sizeof (uint32_t));
3364 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3365 if (t
->level2
[i
] != ~((uint32_t) 0))
3366 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3368 /* Uniquify level2 blocks. */
3370 for (j
= 0; j
< t
->level2_size
; j
++)
3372 for (i
= 0; i
< k
; i
++)
3373 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3374 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3376 /* Relocate block j to block i. */
3381 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3382 (1 << t
->q
) * sizeof (uint32_t));
3388 for (i
= 0; i
< t
->level1_size
; i
++)
3389 if (t
->level1
[i
] != ~((uint32_t) 0))
3390 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3392 /* Create and fill the resulting compressed representation. */
3394 5 * sizeof (uint32_t)
3395 + t
->level1_size
* sizeof (uint32_t)
3396 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3397 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3398 t
->result
= (char *) xmalloc (t
->result_size
);
3401 5 * sizeof (uint32_t);
3403 5 * sizeof (uint32_t)
3404 + t
->level1_size
* sizeof (uint32_t);
3406 5 * sizeof (uint32_t)
3407 + t
->level1_size
* sizeof (uint32_t)
3408 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3410 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3411 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3412 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3413 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3414 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3416 for (i
= 0; i
< t
->level1_size
; i
++)
3417 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3418 (t
->level1
[i
] == ~((uint32_t) 0)
3420 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3422 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3423 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3424 (t
->level2
[i
] == ~((uint32_t) 0)
3426 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3428 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3429 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3431 if (t
->level1_alloc
> 0)
3433 if (t
->level2_alloc
> 0)
3435 if (t
->level3_alloc
> 0)
3439 struct wcwidth_table
3444 /* Working representation. */
3445 size_t level1_alloc
;
3448 size_t level2_alloc
;
3451 size_t level3_alloc
;
3454 /* Compressed representation. */
3459 /* Initialize. Assumes t->p and t->q have already been set. */
3461 wcwidth_table_init (struct wcwidth_table
*t
)
3463 t
->level1_alloc
= t
->level1_size
= 0;
3464 t
->level2_alloc
= t
->level2_size
= 0;
3465 t
->level3_alloc
= t
->level3_size
= 0;
3468 /* Add one entry. */
3470 wcwidth_table_add (struct wcwidth_table
*t
, uint32_t wc
, uint8_t width
)
3472 uint32_t index1
= wc
>> (t
->q
+ t
->p
);
3473 uint32_t index2
= (wc
>> t
->p
) & ((1 << t
->q
) - 1);
3474 uint32_t index3
= wc
& ((1 << t
->p
) - 1);
3480 if (index1
>= t
->level1_size
)
3482 if (index1
>= t
->level1_alloc
)
3484 size_t alloc
= 2 * t
->level1_alloc
;
3485 if (alloc
<= index1
)
3487 t
->level1
= (t
->level1_alloc
> 0
3488 ? (uint32_t *) xrealloc ((char *) t
->level1
,
3489 alloc
* sizeof (uint32_t))
3490 : (uint32_t *) xmalloc (alloc
* sizeof (uint32_t)));
3491 t
->level1_alloc
= alloc
;
3493 while (index1
>= t
->level1_size
)
3494 t
->level1
[t
->level1_size
++] = ~((uint32_t) 0);
3497 if (t
->level1
[index1
] == ~((uint32_t) 0))
3499 if (t
->level2_size
== t
->level2_alloc
)
3501 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3502 t
->level2
= (t
->level2_alloc
> 0
3503 ? (uint32_t *) xrealloc ((char *) t
->level2
,
3504 (alloc
<< t
->q
) * sizeof (uint32_t))
3505 : (uint32_t *) xmalloc ((alloc
<< t
->q
) * sizeof (uint32_t)));
3506 t
->level2_alloc
= alloc
;
3508 i1
= t
->level2_size
<< t
->q
;
3509 i2
= (t
->level2_size
+ 1) << t
->q
;
3510 for (i
= i1
; i
< i2
; i
++)
3511 t
->level2
[i
] = ~((uint32_t) 0);
3512 t
->level1
[index1
] = t
->level2_size
++;
3515 index2
+= t
->level1
[index1
] << t
->q
;
3517 if (t
->level2
[index2
] == ~((uint32_t) 0))
3519 if (t
->level3_size
== t
->level3_alloc
)
3521 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3522 t
->level3
= (t
->level3_alloc
> 0
3523 ? (uint8_t *) xrealloc ((char *) t
->level3
,
3524 (alloc
<< t
->p
) * sizeof (uint8_t))
3525 : (uint8_t *) xmalloc ((alloc
<< t
->p
) * sizeof (uint8_t)));
3526 t
->level3_alloc
= alloc
;
3528 i1
= t
->level3_size
<< t
->p
;
3529 i2
= (t
->level3_size
+ 1) << t
->p
;
3530 for (i
= i1
; i
< i2
; i
++)
3531 t
->level3
[i
] = 0xff;
3532 t
->level2
[index2
] = t
->level3_size
++;
3535 index3
+= t
->level2
[index2
] << t
->p
;
3537 t
->level3
[index3
] = width
;
3540 /* Finalize and shrink. */
3542 wcwidth_table_finalize (struct wcwidth_table
*t
)
3545 uint32_t reorder3
[t
->level3_size
];
3546 uint32_t reorder2
[t
->level2_size
];
3547 uint32_t level1_offset
, level2_offset
, level3_offset
, last_offset
;
3549 /* Uniquify level3 blocks. */
3551 for (j
= 0; j
< t
->level3_size
; j
++)
3553 for (i
= 0; i
< k
; i
++)
3554 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3555 (1 << t
->p
) * sizeof (uint8_t)) == 0)
3557 /* Relocate block j to block i. */
3562 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3563 (1 << t
->p
) * sizeof (uint8_t));
3569 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3570 if (t
->level2
[i
] != ~((uint32_t) 0))
3571 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3573 /* Uniquify level2 blocks. */
3575 for (j
= 0; j
< t
->level2_size
; j
++)
3577 for (i
= 0; i
< k
; i
++)
3578 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3579 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3581 /* Relocate block j to block i. */
3586 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3587 (1 << t
->q
) * sizeof (uint32_t));
3593 for (i
= 0; i
< t
->level1_size
; i
++)
3594 if (t
->level1
[i
] != ~((uint32_t) 0))
3595 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3597 /* Create and fill the resulting compressed representation. */
3599 5 * sizeof (uint32_t)
3600 + t
->level1_size
* sizeof (uint32_t)
3601 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3602 + (t
->level3_size
<< t
->p
) * sizeof (uint8_t);
3603 t
->result_size
= (last_offset
+ 3) & ~3ul;
3604 t
->result
= (char *) xmalloc (t
->result_size
);
3607 5 * sizeof (uint32_t);
3609 5 * sizeof (uint32_t)
3610 + t
->level1_size
* sizeof (uint32_t);
3612 5 * sizeof (uint32_t)
3613 + t
->level1_size
* sizeof (uint32_t)
3614 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3616 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
;
3617 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3618 ((uint32_t *) t
->result
)[2] = t
->p
;
3619 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3620 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3622 for (i
= 0; i
< t
->level1_size
; i
++)
3623 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3624 (t
->level1
[i
] == ~((uint32_t) 0)
3626 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3628 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3629 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3630 (t
->level2
[i
] == ~((uint32_t) 0)
3632 : (t
->level2
[i
] << t
->p
) * sizeof (uint8_t) + level3_offset
);
3634 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3635 ((uint8_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3637 if (last_offset
< t
->result_size
)
3638 memset (t
->result
+ last_offset
, 0, t
->result_size
- last_offset
);
3640 if (t
->level1_alloc
> 0)
3642 if (t
->level2_alloc
> 0)
3644 if (t
->level3_alloc
> 0)
3648 struct wctrans_table
3653 /* Working representation. */
3654 size_t level1_alloc
;
3657 size_t level2_alloc
;
3660 size_t level3_alloc
;
3663 /* Compressed representation. */
3668 /* Initialize. Assumes t->p and t->q have already been set. */
3670 wctrans_table_init (struct wctrans_table
*t
)
3672 t
->level1_alloc
= t
->level1_size
= 0;
3673 t
->level2_alloc
= t
->level2_size
= 0;
3674 t
->level3_alloc
= t
->level3_size
= 0;
3677 /* Add one entry. */
3679 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3681 uint32_t index1
= wc
>> (t
->q
+ t
->p
);
3682 uint32_t index2
= (wc
>> t
->p
) & ((1 << t
->q
) - 1);
3683 uint32_t index3
= wc
& ((1 << t
->p
) - 1);
3684 int32_t value
= (int32_t) mapped_wc
- (int32_t) wc
;
3690 if (index1
>= t
->level1_size
)
3692 if (index1
>= t
->level1_alloc
)
3694 size_t alloc
= 2 * t
->level1_alloc
;
3695 if (alloc
<= index1
)
3697 t
->level1
= (t
->level1_alloc
> 0
3698 ? (uint32_t *) xrealloc ((char *) t
->level1
,
3699 alloc
* sizeof (uint32_t))
3700 : (uint32_t *) xmalloc (alloc
* sizeof (uint32_t)));
3701 t
->level1_alloc
= alloc
;
3703 while (index1
>= t
->level1_size
)
3704 t
->level1
[t
->level1_size
++] = ~((uint32_t) 0);
3707 if (t
->level1
[index1
] == ~((uint32_t) 0))
3709 if (t
->level2_size
== t
->level2_alloc
)
3711 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3712 t
->level2
= (t
->level2_alloc
> 0
3713 ? (uint32_t *) xrealloc ((char *) t
->level2
,
3714 (alloc
<< t
->q
) * sizeof (uint32_t))
3715 : (uint32_t *) xmalloc ((alloc
<< t
->q
) * sizeof (uint32_t)));
3716 t
->level2_alloc
= alloc
;
3718 i1
= t
->level2_size
<< t
->q
;
3719 i2
= (t
->level2_size
+ 1) << t
->q
;
3720 for (i
= i1
; i
< i2
; i
++)
3721 t
->level2
[i
] = ~((uint32_t) 0);
3722 t
->level1
[index1
] = t
->level2_size
++;
3725 index2
+= t
->level1
[index1
] << t
->q
;
3727 if (t
->level2
[index2
] == ~((uint32_t) 0))
3729 if (t
->level3_size
== t
->level3_alloc
)
3731 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3732 t
->level3
= (t
->level3_alloc
> 0
3733 ? (int32_t *) xrealloc ((char *) t
->level3
,
3734 (alloc
<< t
->p
) * sizeof (int32_t))
3735 : (int32_t *) xmalloc ((alloc
<< t
->p
) * sizeof (int32_t)));
3736 t
->level3_alloc
= alloc
;
3738 i1
= t
->level3_size
<< t
->p
;
3739 i2
= (t
->level3_size
+ 1) << t
->p
;
3740 for (i
= i1
; i
< i2
; i
++)
3742 t
->level2
[index2
] = t
->level3_size
++;
3745 index3
+= t
->level2
[index2
] << t
->p
;
3747 t
->level3
[index3
] = value
;
3750 /* Finalize and shrink. */
3752 wctrans_table_finalize (struct wctrans_table
*t
)
3755 uint32_t reorder3
[t
->level3_size
];
3756 uint32_t reorder2
[t
->level2_size
];
3757 uint32_t level1_offset
, level2_offset
, level3_offset
;
3759 /* Uniquify level3 blocks. */
3761 for (j
= 0; j
< t
->level3_size
; j
++)
3763 for (i
= 0; i
< k
; i
++)
3764 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3765 (1 << t
->p
) * sizeof (int32_t)) == 0)
3767 /* Relocate block j to block i. */
3772 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3773 (1 << t
->p
) * sizeof (int32_t));
3779 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3780 if (t
->level2
[i
] != ~((uint32_t) 0))
3781 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3783 /* Uniquify level2 blocks. */
3785 for (j
= 0; j
< t
->level2_size
; j
++)
3787 for (i
= 0; i
< k
; i
++)
3788 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3789 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3791 /* Relocate block j to block i. */
3796 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3797 (1 << t
->q
) * sizeof (uint32_t));
3803 for (i
= 0; i
< t
->level1_size
; i
++)
3804 if (t
->level1
[i
] != ~((uint32_t) 0))
3805 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3807 /* Create and fill the resulting compressed representation. */
3809 5 * sizeof (uint32_t)
3810 + t
->level1_size
* sizeof (uint32_t)
3811 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3812 + (t
->level3_size
<< t
->p
) * sizeof (int32_t);
3813 t
->result
= (char *) xmalloc (t
->result_size
);
3816 5 * sizeof (uint32_t);
3818 5 * sizeof (uint32_t)
3819 + t
->level1_size
* sizeof (uint32_t);
3821 5 * sizeof (uint32_t)
3822 + t
->level1_size
* sizeof (uint32_t)
3823 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3825 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
;
3826 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3827 ((uint32_t *) t
->result
)[2] = t
->p
;
3828 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3829 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3831 for (i
= 0; i
< t
->level1_size
; i
++)
3832 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3833 (t
->level1
[i
] == ~((uint32_t) 0)
3835 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3837 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3838 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3839 (t
->level2
[i
] == ~((uint32_t) 0)
3841 : (t
->level2
[i
] << t
->p
) * sizeof (int32_t) + level3_offset
);
3843 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3844 ((int32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3846 if (t
->level1_alloc
> 0)
3848 if (t
->level2_alloc
> 0)
3850 if (t
->level3_alloc
> 0)
3856 allocate_arrays (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
3857 struct repertoire_t
*repertoire
)
3860 size_t width_table_size
;
3866 /* First we have to decide how we organize the arrays. It is easy
3867 for a one-byte character set. But multi-byte character set
3868 cannot be stored flat because the chars might be sparsely used.
3869 So we determine an optimal hashing function for the used
3872 We use a very trivial hashing function to store the sparse
3873 table. CH % TABSIZE is used as an index. To solve multiple hits
3874 we have N planes. This guarantees a fixed search time for a
3875 character [N / 2]. In the following code we determine the minimum
3876 value for TABSIZE * N, where TABSIZE >= 256.
3878 Some people complained that this algorithm takes too long. Well,
3879 go on, improve it. But changing the step size is *not* an
3880 option. Some people changed this to use only sizes of prime
3881 numbers. Think again, do some math. We are looking for the
3882 optimal solution, not something which works in general. Unless
3883 somebody can provide a dynamic programming solution I think this
3884 implementation is as good as it can get. */
3885 size_t min_total
= UINT_MAX
;
3886 size_t act_size
= 256;
3888 if (oldstyle_tables
)
3890 if (!be_quiet
&& ctype
->charnames_act
> 512)
3892 Computing table size for character classes might take a while..."),
3895 /* While we want to have a small total size we are willing to use a
3896 little bit larger table if this reduces the number of layers.
3897 Therefore we add a little penalty to the number of planes.
3898 Maybe this constant has to be adjusted a bit. */
3902 size_t cnt
[act_size
];
3903 size_t act_planes
= 1;
3905 memset (cnt
, '\0', sizeof cnt
);
3907 for (idx
= 0; idx
< 256; ++idx
)
3910 for (idx
= 0; idx
< ctype
->charnames_act
; ++idx
)
3911 if (ctype
->charnames
[idx
] >= 256)
3913 size_t nr
= ctype
->charnames
[idx
] % act_size
;
3915 if (++cnt
[nr
] > act_planes
)
3917 act_planes
= cnt
[nr
];
3918 if ((act_size
+ PENALTY
) * act_planes
>= min_total
)
3923 if ((act_size
+ PENALTY
) * act_planes
< min_total
)
3925 min_total
= (act_size
+ PENALTY
) * act_planes
;
3926 ctype
->plane_size
= act_size
;
3927 ctype
->plane_cnt
= act_planes
;
3932 while (act_size
< min_total
);
3934 if (!be_quiet
&& ctype
->charnames_act
> 512)
3935 fputs (_(" done\n"), stderr
);
3938 ctype
->names
= (uint32_t *) xcalloc (ctype
->plane_size
3942 for (idx
= 1; idx
< 256; ++idx
)
3943 ctype
->names
[idx
] = idx
;
3945 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
3946 ctype
->names
[0] = 1;
3948 for (idx
= 256; idx
< ctype
->charnames_act
; ++idx
)
3950 size_t nr
= (ctype
->charnames
[idx
] % ctype
->plane_size
);
3953 while (ctype
->names
[nr
+ depth
* ctype
->plane_size
])
3955 assert (depth
< ctype
->plane_cnt
);
3957 ctype
->names
[nr
+ depth
* ctype
->plane_size
] = ctype
->charnames
[idx
];
3959 /* Now for faster access remember the index in the NAMES_B array. */
3960 ctype
->charnames
[idx
] = nr
+ depth
* ctype
->plane_size
;
3962 ctype
->names
[0] = 0;
3966 ctype
->plane_size
= 0;
3967 ctype
->plane_cnt
= 0;
3968 ctype
->names
= NULL
;
3971 /* You wonder about this amount of memory? This is only because some
3972 users do not manage to address the array with unsigned values or
3973 data types with range >= 256. '\200' would result in the array
3974 index -128. To help these poor people we duplicate the entries for
3975 128 up to 255 below the entry for \0. */
3976 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128,
3977 sizeof (char_class_t
));
3978 ctype
->ctype32_b
= (char_class32_t
*)
3979 xcalloc ((oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 256),
3980 sizeof (char_class32_t
));
3981 if (!oldstyle_tables
)
3982 ctype
->class_3level
= (struct iovec
*)
3983 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3985 /* This is the array accessed using the multibyte string elements. */
3986 for (idx
= 0; idx
< 256; ++idx
)
3987 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3989 /* Mirror first 127 entries. We must take care that entry -1 is not
3990 mirrored because EOF == -1. */
3991 for (idx
= 0; idx
< 127; ++idx
)
3992 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3994 if (oldstyle_tables
)
3996 /* The 32 bit array contains all characters. */
3997 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3998 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
4002 /* The 32 bit array contains all characters < 0x100. */
4003 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
4004 if (ctype
->charnames
[idx
] < 0x100)
4005 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
4008 if (!oldstyle_tables
)
4012 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
4014 struct wctype_table t
;
4016 t
.p
= 4; /* or: 5 */
4017 t
.q
= 7; /* or: 6 */
4018 wctype_table_init (&t
);
4020 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
4021 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
4022 wctype_table_add (&t
, ctype
->charnames
[idx
]);
4024 wctype_table_finalize (&t
);
4027 fprintf (stderr
, _("%s: table for class \"%s\": %lu bytes\n"),
4028 "LC_CTYPE", ctype
->classnames
[nr
],
4029 (unsigned long int) t
.result_size
);
4031 ctype
->class_3level
[nr
].iov_base
= t
.result
;
4032 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
4036 /* Room for table of mappings. */
4037 ctype
->map
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
4038 ctype
->map32
= (uint32_t **) xmalloc (ctype
->map_collection_nr
4039 * sizeof (uint32_t *));
4040 if (!oldstyle_tables
)
4041 ctype
->map_3level
= (struct iovec
*)
4042 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
4044 /* Fill in all mappings. */
4045 for (idx
= 0; idx
< 2; ++idx
)
4049 /* Allocate table. */
4050 ctype
->map
[idx
] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
4052 /* Copy values from collection. */
4053 for (idx2
= 0; idx2
< 256; ++idx2
)
4054 ctype
->map
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
4056 /* Mirror first 127 entries. We must take care not to map entry
4057 -1 because EOF == -1. */
4058 for (idx2
= 0; idx2
< 127; ++idx2
)
4059 ctype
->map
[idx
][idx2
] = ctype
->map
[idx
][256 + idx2
];
4061 /* EOF must map to EOF. */
4062 ctype
->map
[idx
][127] = EOF
;
4065 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
4069 /* Allocate table. */
4070 ctype
->map32
[idx
] = (uint32_t *)
4071 xmalloc ((oldstyle_tables
? ctype
->plane_size
* ctype
->plane_cnt
: 256)
4072 * sizeof (uint32_t));
4074 /* Copy default value (identity mapping). */
4075 if (oldstyle_tables
)
4076 memcpy (ctype
->map32
[idx
], ctype
->names
,
4077 ctype
->plane_size
* ctype
->plane_cnt
* sizeof (uint32_t));
4079 for (idx2
= 0; idx2
< 256; ++idx2
)
4080 ctype
->map32
[idx
][idx2
] = idx2
;
4082 /* Copy values from collection. */
4083 for (idx2
= 0; idx2
< 256; ++idx2
)
4084 if (ctype
->map_collection
[idx
][idx2
] != 0)
4085 ctype
->map32
[idx
][idx2
] = ctype
->map_collection
[idx
][idx2
];
4087 if (oldstyle_tables
)
4088 while (idx2
< ctype
->map_collection_act
[idx
])
4090 if (ctype
->map_collection
[idx
][idx2
] != 0)
4091 ctype
->map32
[idx
][ctype
->charnames
[idx2
]] =
4092 ctype
->map_collection
[idx
][idx2
];
4097 if (!oldstyle_tables
)
4101 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
4103 struct wctrans_table t
;
4107 wctrans_table_init (&t
);
4109 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
4110 if (ctype
->map_collection
[nr
][idx
] != 0)
4111 wctrans_table_add (&t
, ctype
->charnames
[idx
],
4112 ctype
->map_collection
[nr
][idx
]);
4114 wctrans_table_finalize (&t
);
4117 fprintf (stderr
, _("%s: table for map \"%s\": %lu bytes\n"),
4118 "LC_CTYPE", ctype
->mapnames
[nr
],
4119 (unsigned long int) t
.result_size
);
4121 ctype
->map_3level
[nr
].iov_base
= t
.result
;
4122 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
4126 /* Extra array for class and map names. */
4127 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
4128 * sizeof (uint32_t));
4129 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
4130 * sizeof (uint32_t));
4132 if (oldstyle_tables
)
4134 ctype
->class_offset
= 0; /* not really used */
4135 ctype
->map_offset
= 0; /* not really used */
4139 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE
);
4140 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
4143 /* Array for width information. Because the expected width are very
4144 small we use only one single byte. This saves space. */
4145 if (oldstyle_tables
)
4147 width_table_size
= (ctype
->plane_size
* ctype
->plane_cnt
+ 3) & ~3ul;
4148 ctype
->width
= (unsigned char *) xmalloc (width_table_size
);
4150 /* Initialize with -1. */
4151 memset (ctype
->width
, '\xff', width_table_size
);
4152 if (charmap
->width_rules
!= NULL
)
4156 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4158 unsigned char bytes
[charmap
->mb_cur_max
];
4159 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4161 /* We have the range of character for which the width is
4162 specified described using byte sequences of the multibyte
4163 charset. We have to convert this to UCS4 now. And we
4164 cannot simply convert the beginning and the end of the
4165 sequence, we have to iterate over the byte sequence and
4166 convert it for every single character. */
4167 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4169 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4170 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4173 /* Find the UCS value for `bytes'. */
4176 struct charseq
*seq
=
4177 charmap_find_symbol (charmap
, bytes
, nbytes
);
4180 wch
= ILLEGAL_CHAR_VALUE
;
4181 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4184 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4185 strlen (seq
->name
));
4187 if (wch
!= ILLEGAL_CHAR_VALUE
)
4189 /* Store the value. */
4190 size_t nr
= wch
% ctype
->plane_size
;
4193 while (ctype
->names
[nr
+ depth
* ctype
->plane_size
] != wch
)
4196 assert (depth
< ctype
->plane_cnt
);
4199 ctype
->width
[nr
+ depth
* ctype
->plane_size
]
4200 = charmap
->width_rules
[cnt
].width
;
4203 /* "Increment" the bytes sequence. */
4205 while (inner
>= 0 && bytes
[inner
] == 0xff)
4210 /* We have to extend the byte sequence. */
4211 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4215 memset (&bytes
[1], 0, nbytes
);
4221 while (++inner
< nbytes
)
4228 /* Now set all the other characters of the character set to the
4231 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
4233 struct charseq
*data
= (struct charseq
*) vdata
;
4237 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
4238 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
4241 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
4243 nr
= data
->ucs4
% ctype
->plane_size
;
4246 while (ctype
->names
[nr
+ depth
* ctype
->plane_size
] != data
->ucs4
)
4249 assert (depth
< ctype
->plane_cnt
);
4252 if (ctype
->width
[nr
+ depth
* ctype
->plane_size
]
4253 == (unsigned char) '\xff')
4254 ctype
->width
[nr
+ depth
* ctype
->plane_size
] =
4255 charmap
->width_default
;
4261 struct wcwidth_table t
;
4265 wcwidth_table_init (&t
);
4267 /* First set all the characters of the character set to the default width. */
4269 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
4271 struct charseq
*data
= (struct charseq
*) vdata
;
4273 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
4274 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
4277 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
4278 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
4281 /* Now add the explicitly specified widths. */
4282 if (charmap
->width_rules
!= NULL
)
4286 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
4288 unsigned char bytes
[charmap
->mb_cur_max
];
4289 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
4291 /* We have the range of character for which the width is
4292 specified described using byte sequences of the multibyte
4293 charset. We have to convert this to UCS4 now. And we
4294 cannot simply convert the beginning and the end of the
4295 sequence, we have to iterate over the byte sequence and
4296 convert it for every single character. */
4297 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
4299 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
4300 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
4303 /* Find the UCS value for `bytes'. */
4306 struct charseq
*seq
=
4307 charmap_find_symbol (charmap
, bytes
, nbytes
);
4310 wch
= ILLEGAL_CHAR_VALUE
;
4311 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
4314 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
4315 strlen (seq
->name
));
4317 if (wch
!= ILLEGAL_CHAR_VALUE
)
4318 /* Store the value. */
4319 wcwidth_table_add (&t
, wch
, charmap
->width_rules
[cnt
].width
);
4321 /* "Increment" the bytes sequence. */
4323 while (inner
>= 0 && bytes
[inner
] == 0xff)
4328 /* We have to extend the byte sequence. */
4329 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
4333 memset (&bytes
[1], 0, nbytes
);
4339 while (++inner
< nbytes
)
4346 wcwidth_table_finalize (&t
);
4349 fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
4350 "LC_CTYPE", (unsigned long int) t
.result_size
);
4352 ctype
->width_3level
.iov_base
= t
.result
;
4353 ctype
->width_3level
.iov_len
= t
.result_size
;
4356 /* Set MB_CUR_MAX. */
4357 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
4359 /* Now determine the table for the transliteration information.
4361 XXX It is not yet clear to me whether it is worth implementing a
4362 complicated algorithm which uses a hash table to locate the entries.
4363 For now I'll use a simple array which can be searching using binary
4365 if (ctype
->translit_copy_locale
!= NULL
)
4367 /* Fold in the transliteration information from the locale mentioned
4368 in the `include' statement. */
4369 struct locale_ctype_t
*here
= ctype
;
4373 struct localedef_t
*other
= find_locale (LC_CTYPE
,
4374 here
->translit_copy_locale
,
4375 repertoire
->name
, charmap
);
4380 %s: transliteration data from locale `%s' not available"),
4381 "LC_CTYPE", here
->translit_copy_locale
);
4385 here
= other
->categories
[LC_CTYPE
].ctype
;
4387 /* Enqueue the information if necessary. */
4388 if (here
->translit
!= NULL
)
4390 struct translit_t
*endp
= here
->translit
;
4391 while (endp
->next
!= NULL
)
4394 endp
->next
= ctype
->translit
;
4395 ctype
->translit
= here
->translit
;
4398 while (here
->translit_copy_locale
!= NULL
);
4401 if (ctype
->translit
!= NULL
)
4403 /* First count how many entries we have. This is the upper limit
4404 since some entries from the included files might be overwritten. */
4407 struct translit_t
*runp
= ctype
->translit
;
4408 struct translit_t
**sorted
;
4409 size_t from_len
, to_len
;
4411 while (runp
!= NULL
)
4417 /* Next we allocate an array large enough and fill in the values. */
4418 sorted
= (struct translit_t
**) alloca (number
4419 * sizeof (struct translit_t
**));
4420 runp
= ctype
->translit
;
4424 /* Search for the place where to insert this string.
4425 XXX Better use a real sorting algorithm later. */
4429 while (idx
< number
)
4431 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
4432 (const wchar_t *) runp
->from
);
4447 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
4448 (number
- idx
) * sizeof (struct translit_t
*));
4455 while (runp
!= NULL
);
4457 /* The next step is putting all the possible transliteration
4458 strings in one memory block so that we can write it out.
4459 We need several different blocks:
4460 - index to the from-string array
4462 - index to the to-string array
4465 from_len
= to_len
= 0;
4466 for (cnt
= 0; cnt
< number
; ++cnt
)
4468 struct translit_to_t
*srunp
;
4469 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4470 srunp
= sorted
[cnt
]->to
;
4471 while (srunp
!= NULL
)
4473 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
4474 srunp
= srunp
->next
;
4476 /* Plus one for the extra NUL character marking the end of
4477 the list for the current entry. */
4481 /* We can allocate the arrays for the results. */
4482 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
4483 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
4484 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
4485 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
4489 for (cnt
= 0; cnt
< number
; ++cnt
)
4492 struct translit_to_t
*srunp
;
4494 ctype
->translit_from_idx
[cnt
] = from_len
;
4495 ctype
->translit_to_idx
[cnt
] = to_len
;
4497 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
4498 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
4499 (const wchar_t *) sorted
[cnt
]->from
, len
);
4502 ctype
->translit_to_idx
[cnt
] = to_len
;
4503 srunp
= sorted
[cnt
]->to
;
4504 while (srunp
!= NULL
)
4506 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
4507 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
4508 (const wchar_t *) srunp
->str
, len
);
4510 srunp
= srunp
->next
;
4512 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
4515 /* Store the information about the length. */
4516 ctype
->translit_idx_size
= number
;
4517 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
4518 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4522 /* Provide some dummy pointers since we have nothing to write out. */
4523 static uint32_t no_str
= { 0 };
4525 ctype
->translit_from_idx
= &no_str
;
4526 ctype
->translit_from_tbl
= &no_str
;
4527 ctype
->translit_to_tbl
= &no_str
;
4528 ctype
->translit_idx_size
= 0;
4529 ctype
->translit_from_tbl_size
= 0;
4530 ctype
->translit_to_tbl_size
= 0;