1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
37 #include "localeinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
42 #include "localedef.h"
47 #ifdef PREDEFINED_CLASSES
48 /* These are the extra bits not in wctype.h since these are not preallocated
50 # define _ISwspecial1 (1 << 29)
51 # define _ISwspecial2 (1 << 30)
52 # define _ISwspecial3 (1 << 31)
56 /* The bit used for representing a special class. */
57 #define BITPOS(class) ((class) - tok_upper)
58 #define BIT(class) (_ISbit (BITPOS (class)))
59 #define BITw(class) (_ISwbit (BITPOS (class)))
61 #define ELEM(ctype, collection, idx, value) \
62 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
63 &ctype->collection##_act idx, value)
66 /* To be compatible with former implementations we for now restrict
67 the number of bits for character classes to 16. When compatibility
68 is not necessary anymore increase the number to 32. */
69 #define char_class_t uint16_t
70 #define char_class32_t uint32_t
73 /* Type to describe a transliteration action. We have a possibly
74 multiple character from-string and a set of multiple character
75 to-strings. All are 32bit values since this is what is used in
76 the gconv functions. */
81 struct translit_to_t
*next
;
91 struct translit_to_t
*to
;
93 struct translit_t
*next
;
96 struct translit_ignore_t
105 struct translit_ignore_t
*next
;
109 /* The real definition of the struct for the LC_CTYPE locale. */
110 struct locale_ctype_t
113 size_t charnames_max
;
114 size_t charnames_act
;
115 /* An index lookup table, to speedup find_idx. */
116 #define MAX_CHARNAMES_IDX 0x10000
117 uint32_t *charnames_idx
;
119 struct repertoire_t
*repertoire
;
121 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
122 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
124 const char *classnames
[MAX_NR_CHARCLASS
];
125 uint32_t last_class_char
;
126 uint32_t class256_collection
[256];
127 uint32_t *class_collection
;
128 size_t class_collection_max
;
129 size_t class_collection_act
;
131 uint32_t class_offset
;
133 struct charseq
**mbdigits
;
140 struct charseq
*mboutdigits
[10];
141 uint32_t wcoutdigits
[10];
142 size_t outdigits_act
;
144 /* If the following number ever turns out to be too small simply
145 increase it. But I doubt it will. --drepper@gnu */
146 #define MAX_NR_CHARMAP 16
147 const char *mapnames
[MAX_NR_CHARMAP
];
148 uint32_t *map_collection
[MAX_NR_CHARMAP
];
149 uint32_t map256_collection
[2][256];
150 size_t map_collection_max
[MAX_NR_CHARMAP
];
151 size_t map_collection_act
[MAX_NR_CHARMAP
];
152 size_t map_collection_nr
;
154 int tomap_done
[MAX_NR_CHARMAP
];
157 /* Transliteration information. */
158 const char *translit_copy_locale
;
159 const char *translit_copy_repertoire
;
160 struct translit_t
*translit
;
161 struct translit_ignore_t
*translit_ignore
;
162 uint32_t ntranslit_ignore
;
164 uint32_t *default_missing
;
165 const char *default_missing_file
;
166 size_t default_missing_lineno
;
168 /* The arrays for the binary representation. */
169 char_class_t
*ctype_b
;
170 char_class32_t
*ctype32_b
;
174 struct iovec
*class_3level
;
175 struct iovec
*map_3level
;
176 uint32_t *class_name_ptr
;
177 uint32_t *map_name_ptr
;
180 const char *codeset_name
;
181 uint32_t *translit_from_idx
;
182 uint32_t *translit_from_tbl
;
183 uint32_t *translit_to_idx
;
184 uint32_t *translit_to_tbl
;
185 uint32_t translit_idx_size
;
186 size_t translit_from_tbl_size
;
187 size_t translit_to_tbl_size
;
189 struct obstack mempool
;
193 #define obstack_chunk_alloc xmalloc
194 #define obstack_chunk_free free
197 /* Prototypes for local functions. */
198 static void ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
199 struct charmap_t
*charmap
,
200 struct localedef_t
*copy_locale
,
202 static void ctype_class_new (struct linereader
*lr
,
203 struct locale_ctype_t
*ctype
, const char *name
);
204 static void ctype_map_new (struct linereader
*lr
,
205 struct locale_ctype_t
*ctype
,
206 const char *name
, struct charmap_t
*charmap
);
207 static uint32_t *find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
,
208 size_t *max
, size_t *act
, unsigned int idx
);
209 static void set_class_defaults (struct locale_ctype_t
*ctype
,
210 struct charmap_t
*charmap
,
211 struct repertoire_t
*repertoire
);
212 static void allocate_arrays (struct locale_ctype_t
*ctype
,
213 struct charmap_t
*charmap
,
214 struct repertoire_t
*repertoire
);
217 static const char *longnames
[] =
219 "zero", "one", "two", "three", "four",
220 "five", "six", "seven", "eight", "nine"
222 static const char *uninames
[] =
224 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
225 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
227 static const unsigned char digits
[] = "0123456789";
231 ctype_startup (struct linereader
*lr
, struct localedef_t
*locale
,
232 struct charmap_t
*charmap
, struct localedef_t
*copy_locale
,
236 struct locale_ctype_t
*ctype
;
238 if (!ignore_content
&& locale
->categories
[LC_CTYPE
].ctype
== NULL
)
240 if (copy_locale
== NULL
)
242 /* Allocate the needed room. */
243 locale
->categories
[LC_CTYPE
].ctype
= ctype
=
244 (struct locale_ctype_t
*) xcalloc (1,
245 sizeof (struct locale_ctype_t
));
247 /* We have seen no names yet. */
248 ctype
->charnames_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
250 (unsigned int *) xmalloc (ctype
->charnames_max
251 * sizeof (unsigned int));
252 for (cnt
= 0; cnt
< 256; ++cnt
)
253 ctype
->charnames
[cnt
] = cnt
;
254 ctype
->charnames_act
= 256;
255 ctype
->charnames_idx
=
256 (uint32_t *) xmalloc (MAX_CHARNAMES_IDX
* sizeof (uint32_t));
257 for (cnt
= 0; cnt
< MAX_CHARNAMES_IDX
; ++cnt
)
258 ctype
->charnames_idx
[cnt
] = ~((uint32_t) 0);
260 /* Fill character class information. */
261 ctype
->last_class_char
= ILLEGAL_CHAR_VALUE
;
262 /* The order of the following instructions determines the bit
264 ctype_class_new (lr
, ctype
, "upper");
265 ctype_class_new (lr
, ctype
, "lower");
266 ctype_class_new (lr
, ctype
, "alpha");
267 ctype_class_new (lr
, ctype
, "digit");
268 ctype_class_new (lr
, ctype
, "xdigit");
269 ctype_class_new (lr
, ctype
, "space");
270 ctype_class_new (lr
, ctype
, "print");
271 ctype_class_new (lr
, ctype
, "graph");
272 ctype_class_new (lr
, ctype
, "blank");
273 ctype_class_new (lr
, ctype
, "cntrl");
274 ctype_class_new (lr
, ctype
, "punct");
275 ctype_class_new (lr
, ctype
, "alnum");
276 #ifdef PREDEFINED_CLASSES
277 /* The following are extensions from ISO 14652. */
278 ctype_class_new (lr
, ctype
, "left_to_right");
279 ctype_class_new (lr
, ctype
, "right_to_left");
280 ctype_class_new (lr
, ctype
, "num_terminator");
281 ctype_class_new (lr
, ctype
, "num_separator");
282 ctype_class_new (lr
, ctype
, "segment_separator");
283 ctype_class_new (lr
, ctype
, "block_separator");
284 ctype_class_new (lr
, ctype
, "direction_control");
285 ctype_class_new (lr
, ctype
, "sym_swap_layout");
286 ctype_class_new (lr
, ctype
, "char_shape_selector");
287 ctype_class_new (lr
, ctype
, "num_shape_selector");
288 ctype_class_new (lr
, ctype
, "non_spacing");
289 ctype_class_new (lr
, ctype
, "non_spacing_level3");
290 ctype_class_new (lr
, ctype
, "normal_connect");
291 ctype_class_new (lr
, ctype
, "r_connect");
292 ctype_class_new (lr
, ctype
, "no_connect");
293 ctype_class_new (lr
, ctype
, "no_connect-space");
294 ctype_class_new (lr
, ctype
, "vowel_connect");
297 ctype
->class_collection_max
= charmap
->mb_cur_max
== 1 ? 256 : 512;
298 ctype
->class_collection
299 = (uint32_t *) xcalloc (sizeof (unsigned long int),
300 ctype
->class_collection_max
);
301 ctype
->class_collection_act
= 256;
303 /* Fill character map information. */
304 ctype
->last_map_idx
= MAX_NR_CHARMAP
;
305 ctype_map_new (lr
, ctype
, "toupper", charmap
);
306 ctype_map_new (lr
, ctype
, "tolower", charmap
);
307 #ifdef PREDEFINED_CLASSES
308 ctype_map_new (lr
, ctype
, "tosymmetric", charmap
);
311 /* Fill first 256 entries in `toXXX' arrays. */
312 for (cnt
= 0; cnt
< 256; ++cnt
)
314 ctype
->map_collection
[0][cnt
] = cnt
;
315 ctype
->map_collection
[1][cnt
] = cnt
;
316 #ifdef PREDEFINED_CLASSES
317 ctype
->map_collection
[2][cnt
] = cnt
;
319 ctype
->map256_collection
[0][cnt
] = cnt
;
320 ctype
->map256_collection
[1][cnt
] = cnt
;
323 obstack_init (&ctype
->mempool
);
326 ctype
= locale
->categories
[LC_CTYPE
].ctype
=
327 copy_locale
->categories
[LC_CTYPE
].ctype
;
333 ctype_finish (struct localedef_t
*locale
, struct charmap_t
*charmap
)
335 /* See POSIX.2, table 2-6 for the meaning of the following table. */
340 const char allow
[NCLASS
];
342 valid_table
[NCLASS
] =
344 /* The order is important. See token.h for more information.
345 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
346 { "upper", "--MX-XDDXXX-" },
347 { "lower", "--MX-XDDXXX-" },
348 { "alpha", "---X-XDDXXX-" },
349 { "digit", "XXX--XDDXXX-" },
350 { "xdigit", "-----XDDXXX-" },
351 { "space", "XXXXX------X" },
352 { "print", "---------X--" },
353 { "graph", "---------X--" },
354 { "blank", "XXXXXM-----X" },
355 { "cntrl", "XXXXX-XX--XX" },
356 { "punct", "XXXXX-DD-X-X" },
357 { "alnum", "-----XDDXXX-" }
361 uint32_t space_value
;
362 struct charseq
*space_seq
;
363 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
370 /* Now resolve copying and also handle completely missing definitions. */
373 const char *repertoire_name
;
375 /* First see whether we were supposed to copy. If yes, find the
376 actual definition. */
377 if (locale
->copy_name
[LC_CTYPE
] != NULL
)
379 /* Find the copying locale. This has to happen transitively since
380 the locale we are copying from might also copying another one. */
381 struct localedef_t
*from
= locale
;
384 from
= find_locale (LC_CTYPE
, from
->copy_name
[LC_CTYPE
],
385 from
->repertoire_name
, charmap
);
386 while (from
->categories
[LC_CTYPE
].ctype
== NULL
387 && from
->copy_name
[LC_CTYPE
] != NULL
);
389 ctype
= locale
->categories
[LC_CTYPE
].ctype
390 = from
->categories
[LC_CTYPE
].ctype
;
393 /* If there is still no definition issue an warning and create an
398 error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
399 ctype_startup (NULL
, locale
, charmap
, NULL
, 0);
400 ctype
= locale
->categories
[LC_CTYPE
].ctype
;
403 /* Get the repertoire we have to use. */
404 repertoire_name
= locale
->repertoire_name
?: repertoire_global
;
405 if (repertoire_name
!= NULL
)
406 ctype
->repertoire
= repertoire_read (repertoire_name
);
409 /* We need the name of the currently used 8-bit character set to
410 make correct conversion between this 8-bit representation and the
411 ISO 10646 character set used internally for wide characters. */
412 ctype
->codeset_name
= charmap
->code_set_name
;
413 if (ctype
->codeset_name
== NULL
)
416 error (0, 0, _("No character set name specified in charmap"));
417 ctype
->codeset_name
= "//UNKNOWN//";
420 /* Set default value for classes not specified. */
421 set_class_defaults (ctype
, charmap
, ctype
->repertoire
);
423 /* Check according to table. */
424 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
426 uint32_t tmp
= ctype
->class_collection
[cnt
];
430 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
431 if ((tmp
& _ISwbit (cls1
)) != 0)
432 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
433 if (valid_table
[cls1
].allow
[cls2
] != '-')
435 int eq
= (tmp
& _ISwbit (cls2
)) != 0;
436 switch (valid_table
[cls1
].allow
[cls2
])
441 uint32_t value
= ctype
->charnames
[cnt
];
445 character L'\\u%0*x' in class `%s' must be in class `%s'"),
446 value
> 0xffff ? 8 : 4, value
,
447 valid_table
[cls1
].name
,
448 valid_table
[cls2
].name
);
455 uint32_t value
= ctype
->charnames
[cnt
];
459 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
460 value
> 0xffff ? 8 : 4, value
,
461 valid_table
[cls1
].name
,
462 valid_table
[cls2
].name
);
467 ctype
->class_collection
[cnt
] |= _ISwbit (cls2
);
471 error (5, 0, _("internal error in %s, line %u"),
472 __FUNCTION__
, __LINE__
);
478 for (cnt
= 0; cnt
< 256; ++cnt
)
480 uint32_t tmp
= ctype
->class256_collection
[cnt
];
484 for (cls1
= 0; cls1
< NCLASS
; ++cls1
)
485 if ((tmp
& _ISbit (cls1
)) != 0)
486 for (cls2
= 0; cls2
< NCLASS
; ++cls2
)
487 if (valid_table
[cls1
].allow
[cls2
] != '-')
489 int eq
= (tmp
& _ISbit (cls2
)) != 0;
490 switch (valid_table
[cls1
].allow
[cls2
])
497 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
501 character '%s' in class `%s' must be in class `%s'"),
502 buf
, valid_table
[cls1
].name
,
503 valid_table
[cls2
].name
);
512 snprintf (buf
, sizeof buf
, "\\%Zo", cnt
);
516 character '%s' in class `%s' must not be in class `%s'"),
517 buf
, valid_table
[cls1
].name
,
518 valid_table
[cls2
].name
);
523 ctype
->class256_collection
[cnt
] |= _ISbit (cls2
);
527 error (5, 0, _("internal error in %s, line %u"),
528 __FUNCTION__
, __LINE__
);
534 /* ... and now test <SP> as a special case. */
536 if (((cnt
= BITPOS (tok_space
),
537 (ELEM (ctype
, class_collection
, , space_value
)
538 & BITw (tok_space
)) == 0)
539 || (cnt
= BITPOS (tok_blank
),
540 (ELEM (ctype
, class_collection
, , space_value
)
541 & BITw (tok_blank
)) == 0)))
544 error (0, 0, _("<SP> character not in class `%s'"),
545 valid_table
[cnt
].name
);
547 else if (((cnt
= BITPOS (tok_punct
),
548 (ELEM (ctype
, class_collection
, , space_value
)
549 & BITw (tok_punct
)) != 0)
550 || (cnt
= BITPOS (tok_graph
),
551 (ELEM (ctype
, class_collection
, , space_value
)
556 error (0, 0, _("<SP> character must not be in class `%s'"),
557 valid_table
[cnt
].name
);
560 ELEM (ctype
, class_collection
, , space_value
) |= BITw (tok_print
);
562 space_seq
= charmap_find_value (charmap
, "SP", 2);
563 if (space_seq
== NULL
)
564 space_seq
= charmap_find_value (charmap
, "space", 5);
565 if (space_seq
== NULL
)
566 space_seq
= charmap_find_value (charmap
, "U00000020", 9);
567 if (space_seq
== NULL
|| space_seq
->nbytes
!= 1)
570 error (0, 0, _("character <SP> not defined in character map"));
572 else if (((cnt
= BITPOS (tok_space
),
573 (ctype
->class256_collection
[space_seq
->bytes
[0]]
574 & BIT (tok_space
)) == 0)
575 || (cnt
= BITPOS (tok_blank
),
576 (ctype
->class256_collection
[space_seq
->bytes
[0]]
577 & BIT (tok_blank
)) == 0)))
580 error (0, 0, _("<SP> character not in class `%s'"),
581 valid_table
[cnt
].name
);
583 else if (((cnt
= BITPOS (tok_punct
),
584 (ctype
->class256_collection
[space_seq
->bytes
[0]]
585 & BIT (tok_punct
)) != 0)
586 || (cnt
= BITPOS (tok_graph
),
587 (ctype
->class256_collection
[space_seq
->bytes
[0]]
588 & BIT (tok_graph
)) != 0)))
591 error (0, 0, _("<SP> character must not be in class `%s'"),
592 valid_table
[cnt
].name
);
595 ctype
->class256_collection
[space_seq
->bytes
[0]] |= BIT (tok_print
);
597 /* Now that the tests are done make sure the name array contains all
598 characters which are handled in the WIDTH section of the
599 character set definition file. */
600 if (charmap
->width_rules
!= NULL
)
601 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
603 unsigned char bytes
[charmap
->mb_cur_max
];
604 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
606 /* We have the range of character for which the width is
607 specified described using byte sequences of the multibyte
608 charset. We have to convert this to UCS4 now. And we
609 cannot simply convert the beginning and the end of the
610 sequence, we have to iterate over the byte sequence and
611 convert it for every single character. */
612 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
614 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
615 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
618 /* Find the UCS value for `bytes'. */
621 struct charseq
*seq
= charmap_find_symbol (charmap
, bytes
, nbytes
);
624 wch
= ILLEGAL_CHAR_VALUE
;
625 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
628 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
631 if (wch
!= ILLEGAL_CHAR_VALUE
)
632 /* We are only interested in the side-effects of the
633 `find_idx' call. It will add appropriate entries in
634 the name array if this is necessary. */
635 (void) find_idx (ctype
, NULL
, NULL
, NULL
, wch
);
637 /* "Increment" the bytes sequence. */
639 while (inner
>= 0 && bytes
[inner
] == 0xff)
644 /* We have to extend the byte sequence. */
645 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
649 memset (&bytes
[1], 0, nbytes
);
655 while (++inner
< nbytes
)
661 /* Now set all the other characters of the character set to the
664 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
666 struct charseq
*data
= (struct charseq
*) vdata
;
668 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
669 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
672 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
673 (void) find_idx (ctype
, NULL
, NULL
, NULL
, data
->ucs4
);
676 /* There must be a multiple of 10 digits. */
677 if (ctype
->mbdigits_act
% 10 != 0)
679 assert (ctype
->mbdigits_act
== ctype
->wcdigits_act
);
680 ctype
->wcdigits_act
-= ctype
->mbdigits_act
% 10;
681 ctype
->mbdigits_act
-= ctype
->mbdigits_act
% 10;
682 error (0, 0, _("`digit' category has not entries in groups of ten"));
685 /* Check the input digits. There must be a multiple of ten available.
686 In each group it could be that one or the other character is missing.
687 In this case the whole group must be removed. */
689 while (cnt
< ctype
->mbdigits_act
)
692 for (inner
= 0; inner
< 10; ++inner
)
693 if (ctype
->mbdigits
[cnt
+ inner
] == NULL
)
700 /* Remove the group. */
701 memmove (&ctype
->mbdigits
[cnt
], &ctype
->mbdigits
[cnt
+ 10],
702 ((ctype
->wcdigits_act
- cnt
- 10)
703 * sizeof (ctype
->mbdigits
[0])));
704 ctype
->mbdigits_act
-= 10;
708 /* If no input digits are given use the default. */
709 if (ctype
->mbdigits_act
== 0)
711 if (ctype
->mbdigits_max
== 0)
713 ctype
->mbdigits
= obstack_alloc (&charmap
->mem_pool
,
714 10 * sizeof (struct charseq
*));
715 ctype
->mbdigits_max
= 10;
718 for (cnt
= 0; cnt
< 10; ++cnt
)
720 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
722 if (ctype
->mbdigits
[cnt
] == NULL
)
724 ctype
->mbdigits
[cnt
] = charmap_find_symbol (charmap
,
726 strlen (longnames
[cnt
]));
727 if (ctype
->mbdigits
[cnt
] == NULL
)
729 /* Hum, this ain't good. */
731 no input digits defined and none of the standard names in the charmap"));
733 ctype
->mbdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
734 sizeof (struct charseq
) + 1);
736 /* This is better than nothing. */
737 ctype
->mbdigits
[cnt
]->bytes
[0] = digits
[cnt
];
738 ctype
->mbdigits
[cnt
]->nbytes
= 1;
743 ctype
->mbdigits_act
= 10;
746 /* Check the wide character input digits. There must be a multiple
747 of ten available. In each group it could be that one or the other
748 character is missing. In this case the whole group must be
751 while (cnt
< ctype
->wcdigits_act
)
754 for (inner
= 0; inner
< 10; ++inner
)
755 if (ctype
->wcdigits
[cnt
+ inner
] == ILLEGAL_CHAR_VALUE
)
762 /* Remove the group. */
763 memmove (&ctype
->wcdigits
[cnt
], &ctype
->wcdigits
[cnt
+ 10],
764 ((ctype
->wcdigits_act
- cnt
- 10)
765 * sizeof (ctype
->wcdigits
[0])));
766 ctype
->wcdigits_act
-= 10;
770 /* If no input digits are given use the default. */
771 if (ctype
->wcdigits_act
== 0)
773 if (ctype
->wcdigits_max
== 0)
775 ctype
->wcdigits
= obstack_alloc (&charmap
->mem_pool
,
776 10 * sizeof (uint32_t));
777 ctype
->wcdigits_max
= 10;
780 for (cnt
= 0; cnt
< 10; ++cnt
)
781 ctype
->wcdigits
[cnt
] = L
'0' + cnt
;
783 ctype
->mbdigits_act
= 10;
786 /* Check the outdigits. */
788 for (cnt
= 0; cnt
< 10; ++cnt
)
789 if (ctype
->mboutdigits
[cnt
] == NULL
)
791 static struct charseq replace
[2];
796 not all characters used in `outdigit' are available in the charmap"));
800 replace
[0].nbytes
= 1;
801 replace
[0].bytes
[0] = '?';
802 replace
[0].bytes
[1] = '\0';
803 ctype
->mboutdigits
[cnt
] = &replace
[0];
807 for (cnt
= 0; cnt
< 10; ++cnt
)
808 if (ctype
->wcoutdigits
[cnt
] == 0)
813 not all characters used in `outdigit' are available in the repertoire"));
817 ctype
->wcoutdigits
[cnt
] = L
'?';
820 /* Sort the entries in the translit_ignore list. */
821 if (ctype
->translit_ignore
!= NULL
)
823 struct translit_ignore_t
*firstp
= ctype
->translit_ignore
;
824 struct translit_ignore_t
*runp
;
826 ctype
->ntranslit_ignore
= 1;
828 for (runp
= firstp
->next
; runp
!= NULL
; runp
= runp
->next
)
830 struct translit_ignore_t
*lastp
= NULL
;
831 struct translit_ignore_t
*cmpp
;
833 ++ctype
->ntranslit_ignore
;
835 for (cmpp
= firstp
; cmpp
!= NULL
; lastp
= cmpp
, cmpp
= cmpp
->next
)
836 if (runp
->from
< cmpp
->from
)
844 ctype
->translit_ignore
= firstp
;
850 ctype_output (struct localedef_t
*locale
, struct charmap_t
*charmap
,
851 const char *output_path
)
853 static const char nulbytes
[4] = { 0, 0, 0, 0 };
854 struct locale_ctype_t
*ctype
= locale
->categories
[LC_CTYPE
].ctype
;
855 const size_t nelems
= (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
)
856 + ctype
->nr_charclass
+ ctype
->map_collection_nr
);
857 struct iovec iov
[2 + nelems
+ 2 * ctype
->nr_charclass
858 + ctype
->map_collection_nr
+ 4];
859 struct locale_file data
;
860 uint32_t idx
[nelems
+ 1];
861 uint32_t default_missing_len
;
862 size_t elem
, cnt
, offset
, total
;
865 /* Now prepare the output: Find the sizes of the table we can use. */
866 allocate_arrays (ctype
, charmap
, ctype
->repertoire
);
868 data
.magic
= LIMAGIC (LC_CTYPE
);
870 iov
[0].iov_base
= (void *) &data
;
871 iov
[0].iov_len
= sizeof (data
);
873 iov
[1].iov_base
= (void *) idx
;
874 iov
[1].iov_len
= nelems
* sizeof (uint32_t);
876 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
879 for (elem
= 0; elem
< nelems
; ++elem
)
881 if (elem
< _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
))
884 #define CTYPE_EMPTY(name) \
886 iov[2 + elem + offset].iov_base = NULL; \
887 iov[2 + elem + offset].iov_len = 0; \
888 idx[elem + 1] = idx[elem]; \
891 CTYPE_EMPTY(_NL_CTYPE_GAP1
);
892 CTYPE_EMPTY(_NL_CTYPE_GAP2
);
893 CTYPE_EMPTY(_NL_CTYPE_GAP3
);
894 CTYPE_EMPTY(_NL_CTYPE_GAP4
);
895 CTYPE_EMPTY(_NL_CTYPE_GAP5
);
896 CTYPE_EMPTY(_NL_CTYPE_GAP6
);
898 #define CTYPE_DATA(name, base, len) \
899 case _NL_ITEM_INDEX (name): \
900 iov[2 + elem + offset].iov_base = (base); \
901 iov[2 + elem + offset].iov_len = (len); \
902 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
905 CTYPE_DATA (_NL_CTYPE_CLASS
,
907 (256 + 128) * sizeof (char_class_t
));
909 CTYPE_DATA (_NL_CTYPE_TOUPPER
,
911 (256 + 128) * sizeof (uint32_t));
912 CTYPE_DATA (_NL_CTYPE_TOLOWER
,
914 (256 + 128) * sizeof (uint32_t));
916 CTYPE_DATA (_NL_CTYPE_TOUPPER32
,
918 256 * sizeof (uint32_t));
919 CTYPE_DATA (_NL_CTYPE_TOLOWER32
,
921 256 * sizeof (uint32_t));
923 CTYPE_DATA (_NL_CTYPE_CLASS32
,
925 256 * sizeof (char_class32_t
));
927 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET
,
928 &ctype
->class_offset
, sizeof (uint32_t));
930 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET
,
931 &ctype
->map_offset
, sizeof (uint32_t));
933 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE
,
934 &ctype
->translit_idx_size
, sizeof (uint32_t));
936 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX
,
937 ctype
->translit_from_idx
,
938 ctype
->translit_idx_size
* sizeof (uint32_t));
940 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL
,
941 ctype
->translit_from_tbl
,
942 ctype
->translit_from_tbl_size
);
944 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX
,
945 ctype
->translit_to_idx
,
946 ctype
->translit_idx_size
* sizeof (uint32_t));
948 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL
,
949 ctype
->translit_to_tbl
, ctype
->translit_to_tbl_size
);
951 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES
):
952 /* The class name array. */
954 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
, ++offset
)
956 iov
[2 + elem
+ offset
].iov_base
957 = (void *) ctype
->classnames
[cnt
];
958 iov
[2 + elem
+ offset
].iov_len
959 = strlen (ctype
->classnames
[cnt
]) + 1;
960 total
+= iov
[2 + elem
+ offset
].iov_len
;
962 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
963 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
964 total
+= 1 + (4 - ((total
+ 1) % 4));
966 idx
[elem
+ 1] = idx
[elem
] + total
;
969 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES
):
970 /* The class name array. */
972 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
, ++offset
)
974 iov
[2 + elem
+ offset
].iov_base
975 = (void *) ctype
->mapnames
[cnt
];
976 iov
[2 + elem
+ offset
].iov_len
977 = strlen (ctype
->mapnames
[cnt
]) + 1;
978 total
+= iov
[2 + elem
+ offset
].iov_len
;
980 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
981 iov
[2 + elem
+ offset
].iov_len
= 1 + (4 - ((total
+ 1) % 4));
982 total
+= 1 + (4 - ((total
+ 1) % 4));
984 idx
[elem
+ 1] = idx
[elem
] + total
;
987 CTYPE_DATA (_NL_CTYPE_WIDTH
,
988 ctype
->width
.iov_base
,
989 ctype
->width
.iov_len
);
991 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX
,
992 &ctype
->mb_cur_max
, sizeof (uint32_t));
994 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME
):
995 total
= strlen (ctype
->codeset_name
) + 1;
997 iov
[2 + elem
+ offset
].iov_base
= (char *) ctype
->codeset_name
;
1000 iov
[2 + elem
+ offset
].iov_base
= alloca ((total
+ 3) & ~3);
1001 memset (mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1002 ctype
->codeset_name
, total
),
1003 '\0', 4 - (total
& 3));
1004 total
= (total
+ 3) & ~3;
1006 iov
[2 + elem
+ offset
].iov_len
= total
;
1007 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1010 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN
):
1011 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1012 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1013 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1014 ctype
->mbdigits_act
/ 10;
1015 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1018 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN
):
1019 /* Align entries. */
1020 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1021 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1022 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1025 iov
[2 + elem
+ offset
].iov_base
= alloca (sizeof (uint32_t));
1026 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1027 *(uint32_t *) iov
[2 + elem
+ offset
].iov_base
=
1028 ctype
->wcdigits_act
/ 10;
1029 idx
[elem
+ 1] = idx
[elem
] + sizeof (uint32_t);
1032 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB
):
1033 /* Compute the length of all possible characters. For INDIGITS
1034 there might be more than one. We simply concatenate all of
1035 them with a NUL byte following. The NUL byte wouldn't be
1036 necessary but it makes it easier for the user. */
1039 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1040 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1041 total
+= ctype
->mbdigits
[cnt
]->nbytes
+ 1;
1042 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1043 iov
[2 + elem
+ offset
].iov_len
= total
;
1045 cp
= iov
[2 + elem
+ offset
].iov_base
;
1046 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB
);
1047 cnt
< ctype
->mbdigits_act
; cnt
+= 10)
1049 cp
= mempcpy (cp
, ctype
->mbdigits
[cnt
]->bytes
,
1050 ctype
->mbdigits
[cnt
]->nbytes
);
1053 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1056 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB
):
1057 /* Compute the length of all possible characters. For INDIGITS
1058 there might be more than one. We simply concatenate all of
1059 them with a NUL byte following. The NUL byte wouldn't be
1060 necessary but it makes it easier for the user. */
1061 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB
);
1062 total
= ctype
->mboutdigits
[cnt
]->nbytes
+ 1;
1063 iov
[2 + elem
+ offset
].iov_base
= (char *) alloca (total
);
1064 iov
[2 + elem
+ offset
].iov_len
= total
;
1066 *(char *) mempcpy (iov
[2 + elem
+ offset
].iov_base
,
1067 ctype
->mboutdigits
[cnt
]->bytes
,
1068 ctype
->mboutdigits
[cnt
]->nbytes
) = '\0';
1069 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1072 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC
):
1073 total
= ctype
->wcdigits_act
/ 10;
1075 iov
[2 + elem
+ offset
].iov_base
=
1076 (uint32_t *) alloca (total
* sizeof (uint32_t));
1077 iov
[2 + elem
+ offset
].iov_len
= total
* sizeof (uint32_t);
1079 for (cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC
);
1080 cnt
< ctype
->wcdigits_act
; cnt
+= 10)
1081 ((uint32_t *) iov
[2 + elem
+ offset
].iov_base
)[cnt
/ 10]
1082 = ctype
->wcdigits
[cnt
];
1083 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1086 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
):
1087 /* Align entries. */
1088 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1089 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1090 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1094 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC
) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC
):
1095 cnt
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC
);
1096 iov
[2 + elem
+ offset
].iov_base
= &ctype
->wcoutdigits
[cnt
];
1097 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1098 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1101 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
):
1102 /* Align entries. */
1103 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1104 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1105 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1108 default_missing_len
= (ctype
->default_missing
1109 ? wcslen ((wchar_t *)ctype
->default_missing
)
1111 iov
[2 + elem
+ offset
].iov_base
= &default_missing_len
;
1112 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1113 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1116 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING
):
1117 iov
[2 + elem
+ offset
].iov_base
=
1118 ctype
->default_missing
?: (uint32_t *) L
"";
1119 iov
[2 + elem
+ offset
].iov_len
=
1120 wcslen (iov
[2 + elem
+ offset
].iov_base
);
1121 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1124 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN
):
1125 /* Align entries. */
1126 iov
[2 + elem
+ offset
].iov_base
= (void *) nulbytes
;
1127 iov
[2 + elem
+ offset
].iov_len
= (4 - idx
[elem
] % 4) % 4;
1128 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1131 iov
[2 + elem
+ offset
].iov_base
= &ctype
->ntranslit_ignore
;
1132 iov
[2 + elem
+ offset
].iov_len
= sizeof (uint32_t);
1133 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1136 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE
):
1138 uint32_t *ranges
= (uint32_t *) alloca (ctype
->ntranslit_ignore
1139 * 3 * sizeof (uint32_t));
1140 struct translit_ignore_t
*runp
;
1142 iov
[2 + elem
+ offset
].iov_base
= ranges
;
1143 iov
[2 + elem
+ offset
].iov_len
= (ctype
->ntranslit_ignore
1144 * 3 * sizeof (uint32_t));
1146 for (runp
= ctype
->translit_ignore
; runp
!= NULL
;
1149 *ranges
++ = runp
->from
;
1150 *ranges
++ = runp
->to
;
1151 *ranges
++ = runp
->step
;
1154 /* Remove the following line in case a new entry is added
1155 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1157 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1161 assert (! "unknown CTYPE element");
1165 /* Handle extra maps. */
1166 size_t nr
= elem
- _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
1167 if (nr
< ctype
->nr_charclass
)
1169 iov
[2 + elem
+ offset
].iov_base
= ctype
->class_b
[nr
];
1170 iov
[2 + elem
+ offset
].iov_len
= 256 / 32 * sizeof (uint32_t);
1171 idx
[elem
] += iov
[2 + elem
+ offset
].iov_len
;
1174 iov
[2 + elem
+ offset
] = ctype
->class_3level
[nr
];
1178 nr
-= ctype
->nr_charclass
;
1179 assert (nr
< ctype
->map_collection_nr
);
1180 iov
[2 + elem
+ offset
] = ctype
->map_3level
[nr
];
1182 idx
[elem
+ 1] = idx
[elem
] + iov
[2 + elem
+ offset
].iov_len
;
1186 assert (2 + elem
+ offset
== (nelems
+ 2 * ctype
->nr_charclass
1187 + ctype
->map_collection_nr
+ 4 + 2));
1189 write_locale_data (output_path
, "LC_CTYPE", 2 + elem
+ offset
, iov
);
1193 /* Local functions. */
1195 ctype_class_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1200 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
1201 if (strcmp (ctype
->classnames
[cnt
], name
) == 0)
1204 if (cnt
< ctype
->nr_charclass
)
1206 lr_error (lr
, _("character class `%s' already defined"), name
);
1210 if (ctype
->nr_charclass
== MAX_NR_CHARCLASS
)
1211 /* Exit code 2 is prescribed in P1003.2b. */
1213 implementation limit: no more than %Zd character classes allowed"),
1216 ctype
->classnames
[ctype
->nr_charclass
++] = name
;
1221 ctype_map_new (struct linereader
*lr
, struct locale_ctype_t
*ctype
,
1222 const char *name
, struct charmap_t
*charmap
)
1224 size_t max_chars
= 0;
1227 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
1229 if (strcmp (ctype
->mapnames
[cnt
], name
) == 0)
1232 if (max_chars
< ctype
->map_collection_max
[cnt
])
1233 max_chars
= ctype
->map_collection_max
[cnt
];
1236 if (cnt
< ctype
->map_collection_nr
)
1238 lr_error (lr
, _("character map `%s' already defined"), name
);
1242 if (ctype
->map_collection_nr
== MAX_NR_CHARMAP
)
1243 /* Exit code 2 is prescribed in P1003.2b. */
1245 implementation limit: no more than %d character maps allowed"),
1248 ctype
->mapnames
[cnt
] = name
;
1251 ctype
->map_collection_max
[cnt
] = charmap
->mb_cur_max
== 1 ? 256 : 512;
1253 ctype
->map_collection_max
[cnt
] = max_chars
;
1255 ctype
->map_collection
[cnt
] = (uint32_t *)
1256 xcalloc (sizeof (uint32_t), ctype
->map_collection_max
[cnt
]);
1257 ctype
->map_collection_act
[cnt
] = 256;
1259 ++ctype
->map_collection_nr
;
1263 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1264 is possible if we only want to extend the name array. */
1266 find_idx (struct locale_ctype_t
*ctype
, uint32_t **table
, size_t *max
,
1267 size_t *act
, uint32_t idx
)
1272 return table
== NULL
? NULL
: &(*table
)[idx
];
1274 /* If idx is in the usual range, use the charnames_idx lookup table
1275 instead of the slow search loop. */
1276 if (idx
< MAX_CHARNAMES_IDX
)
1278 if (ctype
->charnames_idx
[idx
] != ~((uint32_t) 0))
1280 cnt
= ctype
->charnames_idx
[idx
];
1283 cnt
= ctype
->charnames_act
;
1287 for (cnt
= 256; cnt
< ctype
->charnames_act
; ++cnt
)
1288 if (ctype
->charnames
[cnt
] == idx
)
1292 /* We have to distinguish two cases: the name is found or not. */
1293 if (cnt
== ctype
->charnames_act
)
1295 /* Extend the name array. */
1296 if (ctype
->charnames_act
== ctype
->charnames_max
)
1298 ctype
->charnames_max
*= 2;
1299 ctype
->charnames
= (uint32_t *)
1300 xrealloc (ctype
->charnames
,
1301 sizeof (uint32_t) * ctype
->charnames_max
);
1303 ctype
->charnames
[ctype
->charnames_act
++] = idx
;
1304 if (idx
< MAX_CHARNAMES_IDX
)
1305 ctype
->charnames_idx
[idx
] = cnt
;
1309 /* We have done everything we are asked to do. */
1316 size_t old_max
= *max
;
1319 while (*max
<= cnt
);
1322 (uint32_t *) xrealloc (*table
, *max
* sizeof (uint32_t));
1323 memset (&(*table
)[old_max
], '\0',
1324 (*max
- old_max
) * sizeof (uint32_t));
1330 return &(*table
)[cnt
];
1335 get_character (struct token
*now
, struct charmap_t
*charmap
,
1336 struct repertoire_t
*repertoire
,
1337 struct charseq
**seqp
, uint32_t *wchp
)
1339 if (now
->tok
== tok_bsymbol
)
1341 /* This will hopefully be the normal case. */
1342 *wchp
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1343 now
->val
.str
.lenmb
);
1344 *seqp
= charmap_find_value (charmap
, now
->val
.str
.startmb
,
1345 now
->val
.str
.lenmb
);
1347 else if (now
->tok
== tok_ucs4
)
1351 snprintf (utmp
, sizeof (utmp
), "U%08X", now
->val
.ucs4
);
1352 *seqp
= charmap_find_value (charmap
, utmp
, 9);
1355 *seqp
= repertoire_find_seq (repertoire
, now
->val
.ucs4
);
1359 /* Compute the value in the charmap from the UCS value. */
1360 const char *symbol
= repertoire_find_symbol (repertoire
,
1366 *seqp
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1370 if (repertoire
!= NULL
)
1372 /* Insert a negative entry. */
1373 static const struct charseq negative
1374 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1375 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1377 *newp
= now
->val
.ucs4
;
1379 insert_entry (&repertoire
->seq_table
, newp
,
1380 sizeof (uint32_t), (void *) &negative
);
1384 (*seqp
)->ucs4
= now
->val
.ucs4
;
1386 else if ((*seqp
)->ucs4
!= now
->val
.ucs4
)
1389 *wchp
= now
->val
.ucs4
;
1391 else if (now
->tok
== tok_charcode
)
1393 /* We must map from the byte code to UCS4. */
1394 *seqp
= charmap_find_symbol (charmap
, now
->val
.str
.startmb
,
1395 now
->val
.str
.lenmb
);
1398 *wchp
= ILLEGAL_CHAR_VALUE
;
1401 if ((*seqp
)->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1402 (*seqp
)->ucs4
= repertoire_find_value (repertoire
, (*seqp
)->name
,
1403 strlen ((*seqp
)->name
));
1404 *wchp
= (*seqp
)->ucs4
;
1414 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1415 the .(2). counterparts. */
1417 charclass_symbolic_ellipsis (struct linereader
*ldfile
,
1418 struct locale_ctype_t
*ctype
,
1419 struct charmap_t
*charmap
,
1420 struct repertoire_t
*repertoire
,
1422 const char *last_str
,
1423 unsigned long int class256_bit
,
1424 unsigned long int class_bit
, int base
,
1425 int ignore_content
, int handle_digits
, int step
)
1427 const char *nowstr
= now
->val
.str
.startmb
;
1428 char tmp
[now
->val
.str
.lenmb
+ 1];
1431 unsigned long int from
;
1432 unsigned long int to
;
1434 /* We have to compute the ellipsis values using the symbolic names. */
1435 assert (last_str
!= NULL
);
1437 if (strlen (last_str
) != now
->val
.str
.lenmb
)
1441 _("`%s' and `%.*s' are no valid names for symbolic range"),
1442 last_str
, (int) now
->val
.str
.lenmb
, nowstr
);
1446 if (memcmp (last_str
, nowstr
, now
->val
.str
.lenmb
) == 0)
1447 /* Nothing to do, the names are the same. */
1450 for (cp
= last_str
; *cp
== *(nowstr
+ (cp
- last_str
)); ++cp
)
1454 from
= strtoul (cp
, &endp
, base
);
1455 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *endp
!= '\0')
1458 to
= strtoul (nowstr
+ (cp
- last_str
), &endp
, base
);
1459 if ((to
== UINT_MAX
&& errno
== ERANGE
)
1460 || (endp
- nowstr
) != now
->val
.str
.lenmb
|| from
>= to
)
1463 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1464 if (!ignore_content
)
1466 now
->val
.str
.startmb
= tmp
;
1467 while ((from
+= step
) <= to
)
1469 struct charseq
*seq
;
1472 sprintf (tmp
, (base
== 10 ? "%.*s%0*d" : "%.*s%0*X"), cp
- last_str
,
1473 last_str
, now
->val
.str
.lenmb
- (cp
- last_str
), from
);
1475 get_character (now
, charmap
, repertoire
, &seq
, &wch
);
1477 if (seq
!= NULL
&& seq
->nbytes
== 1)
1478 /* Yep, we can store information about this byte sequence. */
1479 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
1481 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1482 /* We have the UCS4 position. */
1483 *find_idx (ctype
, &ctype
->class_collection
,
1484 &ctype
->class_collection_max
,
1485 &ctype
->class_collection_act
, wch
) |= class_bit
;
1487 if (handle_digits
== 1)
1489 /* We must store the digit values. */
1490 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1492 ctype
->mbdigits_max
*= 2;
1493 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1494 (ctype
->mbdigits_max
1495 * sizeof (char *)));
1496 ctype
->wcdigits_max
*= 2;
1497 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1498 (ctype
->wcdigits_max
1499 * sizeof (uint32_t)));
1502 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1503 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1505 else if (handle_digits
== 2)
1507 /* We must store the digit values. */
1508 if (ctype
->outdigits_act
>= 10)
1510 lr_error (ldfile
, _("\
1511 %s: field `%s' does not contain exactly ten entries"),
1512 "LC_CTYPE", "outdigit");
1516 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1517 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1518 ++ctype
->outdigits_act
;
1525 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1527 charclass_ucs4_ellipsis (struct linereader
*ldfile
,
1528 struct locale_ctype_t
*ctype
,
1529 struct charmap_t
*charmap
,
1530 struct repertoire_t
*repertoire
,
1531 struct token
*now
, uint32_t last_wch
,
1532 unsigned long int class256_bit
,
1533 unsigned long int class_bit
, int ignore_content
,
1534 int handle_digits
, int step
)
1536 if (last_wch
> now
->val
.ucs4
)
1538 lr_error (ldfile
, _("\
1539 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1540 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, now
->val
.ucs4
,
1541 (now
->val
.ucs4
| last_wch
) < 65536 ? 4 : 8, last_wch
);
1545 if (!ignore_content
)
1546 while ((last_wch
+= step
) <= now
->val
.ucs4
)
1548 /* We have to find out whether there is a byte sequence corresponding
1549 to this UCS4 value. */
1550 struct charseq
*seq
;
1553 snprintf (utmp
, sizeof (utmp
), "U%08X", last_wch
);
1554 seq
= charmap_find_value (charmap
, utmp
, 9);
1557 snprintf (utmp
, sizeof (utmp
), "U%04X", last_wch
);
1558 seq
= charmap_find_value (charmap
, utmp
, 5);
1562 /* Try looking in the repertoire map. */
1563 seq
= repertoire_find_seq (repertoire
, last_wch
);
1565 /* If this is the first time we look for this sequence create a new
1569 static const struct charseq negative
1570 = { .ucs4
= ILLEGAL_CHAR_VALUE
};
1572 /* Find the symbolic name for this UCS4 value. */
1573 if (repertoire
!= NULL
)
1575 const char *symbol
= repertoire_find_symbol (repertoire
,
1577 uint32_t *newp
= obstack_alloc (&repertoire
->mem_pool
,
1582 /* We have a name, now search the multibyte value. */
1583 seq
= charmap_find_value (charmap
, symbol
, strlen (symbol
));
1586 /* We have to create a fake entry. */
1587 seq
= (struct charseq
*) &negative
;
1589 seq
->ucs4
= last_wch
;
1591 insert_entry (&repertoire
->seq_table
, newp
, sizeof (uint32_t),
1595 /* We have to create a fake entry. */
1596 seq
= (struct charseq
*) &negative
;
1599 /* We have a name, now search the multibyte value. */
1600 if (seq
->ucs4
== last_wch
&& seq
->nbytes
== 1)
1601 /* Yep, we can store information about this byte sequence. */
1602 ctype
->class256_collection
[(size_t) seq
->bytes
[0]]
1605 /* And of course we have the UCS4 position. */
1607 *find_idx (ctype
, &ctype
->class_collection
,
1608 &ctype
->class_collection_max
,
1609 &ctype
->class_collection_act
, last_wch
) |= class_bit
;
1611 if (handle_digits
== 1)
1613 /* We must store the digit values. */
1614 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1616 ctype
->mbdigits_max
*= 2;
1617 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1618 (ctype
->mbdigits_max
1619 * sizeof (char *)));
1620 ctype
->wcdigits_max
*= 2;
1621 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1622 (ctype
->wcdigits_max
1623 * sizeof (uint32_t)));
1626 ctype
->mbdigits
[ctype
->mbdigits_act
++] = (seq
->ucs4
== last_wch
1628 ctype
->wcdigits
[ctype
->wcdigits_act
++] = last_wch
;
1630 else if (handle_digits
== 2)
1632 /* We must store the digit values. */
1633 if (ctype
->outdigits_act
>= 10)
1635 lr_error (ldfile
, _("\
1636 %s: field `%s' does not contain exactly ten entries"),
1637 "LC_CTYPE", "outdigit");
1641 ctype
->mboutdigits
[ctype
->outdigits_act
] = (seq
->ucs4
== last_wch
1643 ctype
->wcoutdigits
[ctype
->outdigits_act
] = last_wch
;
1644 ++ctype
->outdigits_act
;
1650 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1652 charclass_charcode_ellipsis (struct linereader
*ldfile
,
1653 struct locale_ctype_t
*ctype
,
1654 struct charmap_t
*charmap
,
1655 struct repertoire_t
*repertoire
,
1656 struct token
*now
, char *last_charcode
,
1657 uint32_t last_charcode_len
,
1658 unsigned long int class256_bit
,
1659 unsigned long int class_bit
, int ignore_content
,
1662 /* First check whether the to-value is larger. */
1663 if (now
->val
.charcode
.nbytes
!= last_charcode_len
)
1665 lr_error (ldfile
, _("\
1666 start end end character sequence of range must have the same length"));
1670 if (memcmp (last_charcode
, now
->val
.charcode
.bytes
, last_charcode_len
) > 0)
1672 lr_error (ldfile
, _("\
1673 to-value character sequence is smaller than from-value sequence"));
1677 if (!ignore_content
)
1681 /* Increment the byte sequence value. */
1682 struct charseq
*seq
;
1686 for (i
= last_charcode_len
- 1; i
>= 0; --i
)
1687 if (++last_charcode
[i
] != 0)
1690 if (last_charcode_len
== 1)
1691 /* Of course we have the charcode value. */
1692 ctype
->class256_collection
[(size_t) last_charcode
[0]]
1695 /* Find the symbolic name. */
1696 seq
= charmap_find_symbol (charmap
, last_charcode
,
1700 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1701 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1702 strlen (seq
->name
));
1703 wch
= seq
== NULL
? ILLEGAL_CHAR_VALUE
: seq
->ucs4
;
1705 if (wch
!= ILLEGAL_CHAR_VALUE
&& class_bit
!= 0)
1706 *find_idx (ctype
, &ctype
->class_collection
,
1707 &ctype
->class_collection_max
,
1708 &ctype
->class_collection_act
, wch
) |= class_bit
;
1711 wch
= ILLEGAL_CHAR_VALUE
;
1713 if (handle_digits
== 1)
1715 /* We must store the digit values. */
1716 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
1718 ctype
->mbdigits_max
*= 2;
1719 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
1720 (ctype
->mbdigits_max
1721 * sizeof (char *)));
1722 ctype
->wcdigits_max
*= 2;
1723 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
1724 (ctype
->wcdigits_max
1725 * sizeof (uint32_t)));
1728 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1729 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1730 seq
->nbytes
= last_charcode_len
;
1732 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
1733 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
1735 else if (handle_digits
== 2)
1737 struct charseq
*seq
;
1738 /* We must store the digit values. */
1739 if (ctype
->outdigits_act
>= 10)
1741 lr_error (ldfile
, _("\
1742 %s: field `%s' does not contain exactly ten entries"),
1743 "LC_CTYPE", "outdigit");
1747 seq
= xmalloc (sizeof (struct charseq
) + last_charcode_len
);
1748 memcpy ((char *) (seq
+ 1), last_charcode
, last_charcode_len
);
1749 seq
->nbytes
= last_charcode_len
;
1751 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
1752 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
1753 ++ctype
->outdigits_act
;
1756 while (memcmp (last_charcode
, now
->val
.charcode
.bytes
,
1757 last_charcode_len
) != 0);
1762 /* Read one transliteration entry. */
1764 read_widestring (struct linereader
*ldfile
, struct token
*now
,
1765 struct charmap_t
*charmap
, struct repertoire_t
*repertoire
)
1769 if (now
->tok
== tok_default_missing
)
1770 /* The special name "" will denote this case. */
1771 wstr
= ((uint32_t *) { 0 });
1772 else if (now
->tok
== tok_bsymbol
)
1774 /* Get the value from the repertoire. */
1775 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1776 wstr
[0] = repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1777 now
->val
.str
.lenmb
);
1778 if (wstr
[0] == ILLEGAL_CHAR_VALUE
)
1780 /* We cannot proceed, we don't know the UCS4 value. */
1787 else if (now
->tok
== tok_ucs4
)
1789 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1790 wstr
[0] = now
->val
.ucs4
;
1793 else if (now
->tok
== tok_charcode
)
1795 /* Argh, we have to convert to the symbol name first and then to the
1797 struct charseq
*seq
= charmap_find_symbol (charmap
,
1798 now
->val
.str
.startmb
,
1799 now
->val
.str
.lenmb
);
1801 /* Cannot find the UCS4 value. */
1804 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1805 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1806 strlen (seq
->name
));
1807 if (seq
->ucs4
== ILLEGAL_CHAR_VALUE
)
1808 /* We cannot proceed, we don't know the UCS4 value. */
1811 wstr
= (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1812 wstr
[0] = seq
->ucs4
;
1815 else if (now
->tok
== tok_string
)
1817 wstr
= now
->val
.str
.startwc
;
1818 if (wstr
== NULL
|| wstr
[0] == 0)
1823 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
1824 lr_ignore_rest (ldfile
, 0);
1825 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1826 return (uint32_t *) -1l;
1834 read_translit_entry (struct linereader
*ldfile
, struct locale_ctype_t
*ctype
,
1835 struct token
*now
, struct charmap_t
*charmap
,
1836 struct repertoire_t
*repertoire
)
1838 uint32_t *from_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1839 struct translit_t
*result
;
1840 struct translit_to_t
**top
;
1841 struct obstack
*ob
= &ctype
->mempool
;
1845 if (from_wstr
== NULL
)
1846 /* There is no valid from string. */
1849 result
= (struct translit_t
*) obstack_alloc (ob
,
1850 sizeof (struct translit_t
));
1851 result
->from
= from_wstr
;
1852 result
->fname
= ldfile
->fname
;
1853 result
->lineno
= ldfile
->lineno
;
1854 result
->next
= NULL
;
1864 /* Next we have one or more transliterations. They are
1865 separated by semicolons. */
1866 now
= lr_token (ldfile
, charmap
, repertoire
);
1868 if (!first
&& (now
->tok
== tok_semicolon
|| now
->tok
== tok_eol
))
1870 /* One string read. */
1871 const uint32_t zero
= 0;
1875 obstack_grow (ob
, &zero
, 4);
1876 to_wstr
= obstack_finish (ob
);
1878 *top
= obstack_alloc (ob
, sizeof (struct translit_to_t
));
1879 (*top
)->str
= to_wstr
;
1880 (*top
)->next
= NULL
;
1883 if (now
->tok
== tok_eol
)
1885 result
->next
= ctype
->translit
;
1886 ctype
->translit
= result
;
1891 top
= &(*top
)->next
;
1896 to_wstr
= read_widestring (ldfile
, now
, charmap
, repertoire
);
1897 if (to_wstr
== (uint32_t *) -1l)
1899 /* An error occurred. */
1900 obstack_free (ob
, result
);
1904 if (to_wstr
== NULL
)
1907 /* This value is usable. */
1908 obstack_grow (ob
, to_wstr
, wcslen ((wchar_t *) to_wstr
) * 4);
1917 read_translit_ignore_entry (struct linereader
*ldfile
,
1918 struct locale_ctype_t
*ctype
,
1919 struct charmap_t
*charmap
,
1920 struct repertoire_t
*repertoire
)
1922 /* We expect a semicolon-separated list of characters we ignore. We are
1923 only interested in the wide character definitions. These must be
1924 single characters, possibly defining a range when an ellipsis is used. */
1927 struct token
*now
= lr_token (ldfile
, charmap
, repertoire
);
1928 struct translit_ignore_t
*newp
;
1931 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1934 _("premature end of `translit_ignore' definition"));
1938 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1940 lr_error (ldfile
, _("syntax error"));
1941 lr_ignore_rest (ldfile
, 0);
1945 if (now
->tok
== tok_ucs4
)
1946 from
= now
->val
.ucs4
;
1948 /* Try to get the value. */
1949 from
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
1950 now
->val
.str
.lenmb
);
1952 if (from
== ILLEGAL_CHAR_VALUE
)
1954 lr_error (ldfile
, "invalid character name");
1959 newp
= (struct translit_ignore_t
*)
1960 obstack_alloc (&ctype
->mempool
, sizeof (struct translit_ignore_t
));
1965 newp
->next
= ctype
->translit_ignore
;
1966 ctype
->translit_ignore
= newp
;
1969 /* Now we expect either a semicolon, an ellipsis, or the end of the
1971 now
= lr_token (ldfile
, charmap
, repertoire
);
1973 if (now
->tok
== tok_ellipsis2
|| now
->tok
== tok_ellipsis2_2
)
1975 /* XXX Should we bother implementing `....'? `...' certainly
1976 will not be implemented. */
1978 int step
= now
->tok
== tok_ellipsis2_2
? 2 : 1;
1980 now
= lr_token (ldfile
, charmap
, repertoire
);
1982 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
1985 _("premature end of `translit_ignore' definition"));
1989 if (now
->tok
!= tok_bsymbol
&& now
->tok
!= tok_ucs4
)
1991 lr_error (ldfile
, _("syntax error"));
1992 lr_ignore_rest (ldfile
, 0);
1996 if (now
->tok
== tok_ucs4
)
1999 /* Try to get the value. */
2000 to
= repertoire_find_value (repertoire
, now
->val
.str
.startmb
,
2001 now
->val
.str
.lenmb
);
2003 if (to
== ILLEGAL_CHAR_VALUE
)
2004 lr_error (ldfile
, "invalid character name");
2007 /* Make sure the `to'-value is larger. */
2014 lr_error (ldfile
, _("\
2015 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2016 (to
| from
) < 65536 ? 4 : 8, to
,
2017 (to
| from
) < 65536 ? 4 : 8, from
);
2020 /* And the next token. */
2021 now
= lr_token (ldfile
, charmap
, repertoire
);
2024 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2028 if (now
->tok
== tok_semicolon
)
2032 /* If we come here something is wrong. */
2033 lr_error (ldfile
, _("syntax error"));
2034 lr_ignore_rest (ldfile
, 0);
2040 /* The parser for the LC_CTYPE section of the locale definition. */
2042 ctype_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2043 struct charmap_t
*charmap
, const char *repertoire_name
,
2046 struct repertoire_t
*repertoire
= NULL
;
2047 struct locale_ctype_t
*ctype
;
2049 enum token_t nowtok
;
2051 struct charseq
*last_seq
;
2052 uint32_t last_wch
= 0;
2053 enum token_t last_token
;
2054 enum token_t ellipsis_token
;
2056 char last_charcode
[16];
2057 size_t last_charcode_len
= 0;
2058 const char *last_str
= NULL
;
2060 struct localedef_t
*copy_locale
= NULL
;
2062 /* Get the repertoire we have to use. */
2063 if (repertoire_name
!= NULL
)
2064 repertoire
= repertoire_read (repertoire_name
);
2066 /* The rest of the line containing `LC_CTYPE' must be free. */
2067 lr_ignore_rest (ldfile
, 1);
2072 now
= lr_token (ldfile
, charmap
, NULL
);
2075 while (nowtok
== tok_eol
);
2077 /* If we see `copy' now we are almost done. */
2078 if (nowtok
== tok_copy
)
2080 now
= lr_token (ldfile
, charmap
, NULL
);
2081 if (now
->tok
!= tok_string
)
2083 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2087 now
= lr_token (ldfile
, charmap
, NULL
);
2088 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2090 if (now
->tok
!= tok_eof
2091 || (now
= lr_token (ldfile
, charmap
, NULL
), now
->tok
== tok_eof
))
2092 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2093 else if (now
->tok
!= tok_lc_ctype
)
2095 lr_error (ldfile
, _("\
2096 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2097 lr_ignore_rest (ldfile
, 0);
2100 lr_ignore_rest (ldfile
, 1);
2105 if (! ignore_content
)
2107 /* Get the locale definition. */
2108 copy_locale
= load_locale (LC_CTYPE
, now
->val
.str
.startmb
,
2109 repertoire_name
, charmap
, NULL
);
2110 if ((copy_locale
->avail
& CTYPE_LOCALE
) == 0)
2112 /* Not yet loaded. So do it now. */
2113 if (locfile_read (copy_locale
, charmap
) != 0)
2118 lr_ignore_rest (ldfile
, 1);
2120 now
= lr_token (ldfile
, charmap
, NULL
);
2124 /* Prepare the data structures. */
2125 ctype_startup (ldfile
, result
, charmap
, copy_locale
, ignore_content
);
2126 ctype
= result
->categories
[LC_CTYPE
].ctype
;
2128 /* Remember the repertoire we use. */
2129 if (!ignore_content
)
2130 ctype
->repertoire
= repertoire
;
2134 unsigned long int class_bit
= 0;
2135 unsigned long int class256_bit
= 0;
2136 int handle_digits
= 0;
2138 /* Of course we don't proceed beyond the end of file. */
2139 if (nowtok
== tok_eof
)
2142 /* Ingore empty lines. */
2143 if (nowtok
== tok_eol
)
2145 now
= lr_token (ldfile
, charmap
, NULL
);
2153 now
= lr_token (ldfile
, charmap
, NULL
);
2154 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2156 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2157 now
= lr_token (ldfile
, charmap
, NULL
);
2158 if (now
->tok
!= tok_semicolon
)
2160 now
= lr_token (ldfile
, charmap
, NULL
);
2162 if (now
->tok
!= tok_eol
)
2164 %s: syntax error in definition of new character class"), "LC_CTYPE");
2168 now
= lr_token (ldfile
, charmap
, NULL
);
2169 while (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2171 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2172 now
= lr_token (ldfile
, charmap
, NULL
);
2173 if (now
->tok
!= tok_semicolon
)
2175 now
= lr_token (ldfile
, charmap
, NULL
);
2177 if (now
->tok
!= tok_eol
)
2179 %s: syntax error in definition of new character map"), "LC_CTYPE");
2183 /* Ignore the rest of the line if we don't need the input of
2187 lr_ignore_rest (ldfile
, 0);
2191 /* We simply forget the `class' keyword and use the following
2192 operand to determine the bit. */
2193 now
= lr_token (ldfile
, charmap
, NULL
);
2194 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2196 /* Must can be one of the predefined class names. */
2197 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2198 if (strcmp (ctype
->classnames
[cnt
], now
->val
.str
.startmb
) == 0)
2200 if (cnt
>= ctype
->nr_charclass
)
2202 #ifdef PREDEFINED_CLASSES
2203 if (now
->val
.str
.lenmb
== 8
2204 && memcmp ("special1", now
->val
.str
.startmb
, 8) == 0)
2205 class_bit
= _ISwspecial1
;
2206 else if (now
->val
.str
.lenmb
== 8
2207 && memcmp ("special2", now
->val
.str
.startmb
, 8) == 0)
2208 class_bit
= _ISwspecial2
;
2209 else if (now
->val
.str
.lenmb
== 8
2210 && memcmp ("special3", now
->val
.str
.startmb
, 8) == 0)
2211 class_bit
= _ISwspecial3
;
2215 /* OK, it's a new class. */
2216 ctype_class_new (ldfile
, ctype
, now
->val
.str
.startmb
);
2218 class_bit
= _ISwbit (ctype
->nr_charclass
- 1);
2223 class_bit
= _ISwbit (cnt
);
2225 free (now
->val
.str
.startmb
);
2228 else if (now
->tok
== tok_digit
)
2229 goto handle_tok_digit
;
2230 else if (now
->tok
< tok_upper
|| now
->tok
> tok_blank
)
2234 class_bit
= BITw (now
->tok
);
2235 class256_bit
= BIT (now
->tok
);
2238 /* The next character must be a semicolon. */
2239 now
= lr_token (ldfile
, charmap
, NULL
);
2240 if (now
->tok
!= tok_semicolon
)
2242 goto read_charclass
;
2255 /* Ignore the rest of the line if we don't need the input of
2259 lr_ignore_rest (ldfile
, 0);
2263 class_bit
= BITw (now
->tok
);
2264 class256_bit
= BIT (now
->tok
);
2267 ctype
->class_done
|= class_bit
;
2268 last_token
= tok_none
;
2269 ellipsis_token
= tok_none
;
2271 now
= lr_token (ldfile
, charmap
, NULL
);
2272 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2275 struct charseq
*seq
;
2277 if (ellipsis_token
== tok_none
)
2279 if (get_character (now
, charmap
, repertoire
, &seq
, &wch
))
2282 if (!ignore_content
&& seq
!= NULL
&& seq
->nbytes
== 1)
2283 /* Yep, we can store information about this byte
2285 ctype
->class256_collection
[seq
->bytes
[0]] |= class256_bit
;
2287 if (!ignore_content
&& wch
!= ILLEGAL_CHAR_VALUE
2289 /* We have the UCS4 position. */
2290 *find_idx (ctype
, &ctype
->class_collection
,
2291 &ctype
->class_collection_max
,
2292 &ctype
->class_collection_act
, wch
) |= class_bit
;
2294 last_token
= now
->tok
;
2295 /* Terminate the string. */
2296 if (last_token
== tok_bsymbol
)
2298 now
->val
.str
.startmb
[now
->val
.str
.lenmb
] = '\0';
2299 last_str
= now
->val
.str
.startmb
;
2305 memcpy (last_charcode
, now
->val
.charcode
.bytes
, 16);
2306 last_charcode_len
= now
->val
.charcode
.nbytes
;
2308 if (!ignore_content
&& handle_digits
== 1)
2310 /* We must store the digit values. */
2311 if (ctype
->mbdigits_act
== ctype
->mbdigits_max
)
2313 ctype
->mbdigits_max
+= 10;
2314 ctype
->mbdigits
= xrealloc (ctype
->mbdigits
,
2315 (ctype
->mbdigits_max
2316 * sizeof (char *)));
2317 ctype
->wcdigits_max
+= 10;
2318 ctype
->wcdigits
= xrealloc (ctype
->wcdigits
,
2319 (ctype
->wcdigits_max
2320 * sizeof (uint32_t)));
2323 ctype
->mbdigits
[ctype
->mbdigits_act
++] = seq
;
2324 ctype
->wcdigits
[ctype
->wcdigits_act
++] = wch
;
2326 else if (!ignore_content
&& handle_digits
== 2)
2328 /* We must store the digit values. */
2329 if (ctype
->outdigits_act
>= 10)
2331 lr_error (ldfile
, _("\
2332 %s: field `%s' does not contain exactly ten entries"),
2333 "LC_CTYPE", "outdigit");
2334 lr_ignore_rest (ldfile
, 0);
2338 ctype
->mboutdigits
[ctype
->outdigits_act
] = seq
;
2339 ctype
->wcoutdigits
[ctype
->outdigits_act
] = wch
;
2340 ++ctype
->outdigits_act
;
2345 /* Now it gets complicated. We have to resolve the
2346 ellipsis problem. First we must distinguish between
2347 the different kind of ellipsis and this must match the
2348 tokens we have seen. */
2349 assert (last_token
!= tok_none
);
2351 if (last_token
!= now
->tok
)
2353 lr_error (ldfile
, _("\
2354 ellipsis range must be marked by two operands of same type"));
2355 lr_ignore_rest (ldfile
, 0);
2359 if (last_token
== tok_bsymbol
)
2361 if (ellipsis_token
== tok_ellipsis3
)
2362 lr_error (ldfile
, _("with symbolic name range values \
2363 the absolute ellipsis `...' must not be used"));
2365 charclass_symbolic_ellipsis (ldfile
, ctype
, charmap
,
2366 repertoire
, now
, last_str
,
2367 class256_bit
, class_bit
,
2372 handle_digits
, step
);
2374 else if (last_token
== tok_ucs4
)
2376 if (ellipsis_token
!= tok_ellipsis2
)
2377 lr_error (ldfile
, _("\
2378 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2380 charclass_ucs4_ellipsis (ldfile
, ctype
, charmap
,
2381 repertoire
, now
, last_wch
,
2382 class256_bit
, class_bit
,
2383 ignore_content
, handle_digits
,
2388 assert (last_token
== tok_charcode
);
2390 if (ellipsis_token
!= tok_ellipsis3
)
2391 lr_error (ldfile
, _("\
2392 with character code range values one must use the absolute ellipsis `...'"));
2394 charclass_charcode_ellipsis (ldfile
, ctype
, charmap
,
2398 class256_bit
, class_bit
,
2403 /* Now we have used the last value. */
2404 last_token
= tok_none
;
2407 /* Next we expect a semicolon or the end of the line. */
2408 now
= lr_token (ldfile
, charmap
, NULL
);
2409 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2412 if (last_token
!= tok_none
2413 && now
->tok
>= tok_ellipsis2
&& now
->tok
<= tok_ellipsis4_2
)
2415 if (now
->tok
== tok_ellipsis2_2
)
2417 now
->tok
= tok_ellipsis2
;
2420 else if (now
->tok
== tok_ellipsis4_2
)
2422 now
->tok
= tok_ellipsis4
;
2426 ellipsis_token
= now
->tok
;
2428 now
= lr_token (ldfile
, charmap
, NULL
);
2432 if (now
->tok
!= tok_semicolon
)
2435 /* And get the next character. */
2436 now
= lr_token (ldfile
, charmap
, NULL
);
2438 ellipsis_token
= tok_none
;
2444 /* Ignore the rest of the line if we don't need the input of
2448 lr_ignore_rest (ldfile
, 0);
2453 class_bit
= _ISwdigit
;
2454 class256_bit
= _ISdigit
;
2456 goto read_charclass
;
2459 /* Ignore the rest of the line if we don't need the input of
2463 lr_ignore_rest (ldfile
, 0);
2467 if (ctype
->outdigits_act
!= 0)
2468 lr_error (ldfile
, _("\
2469 %s: field `%s' declared more than once"),
2470 "LC_CTYPE", "outdigit");
2474 goto read_charclass
;
2477 /* Ignore the rest of the line if we don't need the input of
2481 lr_ignore_rest (ldfile
, 0);
2489 /* Ignore the rest of the line if we don't need the input of
2493 lr_ignore_rest (ldfile
, 0);
2501 /* Ignore the rest of the line if we don't need the input of
2505 lr_ignore_rest (ldfile
, 0);
2509 /* We simply forget the `map' keyword and use the following
2510 operand to determine the mapping. */
2511 now
= lr_token (ldfile
, charmap
, NULL
);
2512 if (now
->tok
== tok_ident
|| now
->tok
== tok_string
)
2516 for (cnt
= 2; cnt
< ctype
->map_collection_nr
; ++cnt
)
2517 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2520 if (cnt
< ctype
->map_collection_nr
)
2521 free (now
->val
.str
.startmb
);
2523 /* OK, it's a new map. */
2524 ctype_map_new (ldfile
, ctype
, now
->val
.str
.startmb
, charmap
);
2528 else if (now
->tok
< tok_toupper
|| now
->tok
> tok_tolower
)
2531 mapidx
= now
->tok
- tok_toupper
;
2533 now
= lr_token (ldfile
, charmap
, NULL
);
2534 /* This better should be a semicolon. */
2535 if (now
->tok
!= tok_semicolon
)
2539 /* Test whether this mapping was already defined. */
2540 if (ctype
->tomap_done
[mapidx
])
2542 lr_error (ldfile
, _("duplicated definition for mapping `%s'"),
2543 ctype
->mapnames
[mapidx
]);
2544 lr_ignore_rest (ldfile
, 0);
2547 ctype
->tomap_done
[mapidx
] = 1;
2549 now
= lr_token (ldfile
, charmap
, NULL
);
2550 while (now
->tok
!= tok_eol
&& now
->tok
!= tok_eof
)
2552 struct charseq
*from_seq
;
2554 struct charseq
*to_seq
;
2557 /* Every pair starts with an opening brace. */
2558 if (now
->tok
!= tok_open_brace
)
2561 /* Next comes the from-value. */
2562 now
= lr_token (ldfile
, charmap
, NULL
);
2563 if (get_character (now
, charmap
, repertoire
, &from_seq
,
2567 /* The next is a comma. */
2568 now
= lr_token (ldfile
, charmap
, NULL
);
2569 if (now
->tok
!= tok_comma
)
2572 /* And the other value. */
2573 now
= lr_token (ldfile
, charmap
, NULL
);
2574 if (get_character (now
, charmap
, repertoire
, &to_seq
,
2578 /* And the last thing is the closing brace. */
2579 now
= lr_token (ldfile
, charmap
, NULL
);
2580 if (now
->tok
!= tok_close_brace
)
2583 if (!ignore_content
)
2585 if (mapidx
< 2 && from_seq
!= NULL
&& to_seq
!= NULL
2586 && from_seq
->nbytes
== 1 && to_seq
->nbytes
== 1)
2587 /* We can use this value. */
2588 ctype
->map256_collection
[mapidx
][from_seq
->bytes
[0]]
2591 if (from_wch
!= ILLEGAL_CHAR_VALUE
2592 && to_wch
!= ILLEGAL_CHAR_VALUE
)
2593 /* Both correct values. */
2594 *find_idx (ctype
, &ctype
->map_collection
[mapidx
],
2595 &ctype
->map_collection_max
[mapidx
],
2596 &ctype
->map_collection_act
[mapidx
],
2600 /* Now comes a semicolon or the end of the line/file. */
2601 now
= lr_token (ldfile
, charmap
, NULL
);
2602 if (now
->tok
== tok_semicolon
)
2603 now
= lr_token (ldfile
, charmap
, NULL
);
2607 case tok_translit_start
:
2608 /* Ignore the rest of the line if we don't need the input of
2612 lr_ignore_rest (ldfile
, 0);
2616 /* The rest of the line better should be empty. */
2617 lr_ignore_rest (ldfile
, 1);
2619 /* We count here the number of allocated entries in the `translit'
2623 ldfile
->translate_strings
= 1;
2624 ldfile
->return_widestr
= 1;
2626 /* We proceed until we see the `translit_end' token. */
2627 while (now
= lr_token (ldfile
, charmap
, repertoire
),
2628 now
->tok
!= tok_translit_end
&& now
->tok
!= tok_eof
)
2630 if (now
->tok
== tok_eol
)
2631 /* Ignore empty lines. */
2634 if (now
->tok
== tok_translit_end
)
2636 lr_ignore_rest (ldfile
, 0);
2640 if (now
->tok
== tok_include
)
2642 /* We have to include locale. */
2643 const char *locale_name
;
2644 const char *repertoire_name
;
2646 now
= lr_token (ldfile
, charmap
, NULL
);
2647 /* This should be a string or an identifier. In any
2648 case something to name a locale. */
2649 if (now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2652 lr_error (ldfile
, _("%s: syntax error"), "LC_CTYPE");
2653 lr_ignore_rest (ldfile
, 0);
2656 locale_name
= now
->val
.str
.startmb
;
2658 /* Next should be a semicolon. */
2659 now
= lr_token (ldfile
, charmap
, NULL
);
2660 if (now
->tok
!= tok_semicolon
)
2661 goto translit_syntax
;
2663 /* Now the repertoire name. */
2664 now
= lr_token (ldfile
, charmap
, NULL
);
2665 if ((now
->tok
!= tok_string
&& now
->tok
!= tok_ident
)
2666 || now
->val
.str
.startmb
== NULL
)
2667 goto translit_syntax
;
2668 repertoire_name
= now
->val
.str
.startmb
;
2670 /* We must not have more than one `include'. */
2671 if (ctype
->translit_copy_locale
!= NULL
)
2673 lr_error (ldfile
, _("\
2674 %s: only one `include' instruction allowed"), "LC_CTYPE");
2675 lr_ignore_rest (ldfile
, 0);
2679 ctype
->translit_copy_locale
= locale_name
;
2680 ctype
->translit_copy_repertoire
= repertoire_name
;
2682 /* The rest of the line must be empty. */
2683 lr_ignore_rest (ldfile
, 1);
2685 /* Make sure the locale is read. */
2686 add_to_readlist (LC_CTYPE
, ctype
->translit_copy_locale
,
2687 repertoire_name
, 1, NULL
);
2690 else if (now
->tok
== tok_default_missing
)
2696 /* We expect a single character or string as the
2698 now
= lr_token (ldfile
, charmap
, NULL
);
2699 wstr
= read_widestring (ldfile
, now
, charmap
,
2704 if (ctype
->default_missing
!= NULL
)
2706 lr_error (ldfile
, _("\
2707 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2708 error_at_line (0, 0, ctype
->default_missing_file
,
2709 ctype
->default_missing_lineno
,
2711 previous definition was here"));
2715 ctype
->default_missing
= wstr
;
2716 ctype
->default_missing_file
= ldfile
->fname
;
2717 ctype
->default_missing_lineno
= ldfile
->lineno
;
2719 /* We can have more entries, ignore them. */
2720 lr_ignore_rest (ldfile
, 0);
2723 else if (wstr
== (uint32_t *) -1l)
2724 /* This was an syntax error. */
2727 /* Maybe there is another replacement we can use. */
2728 now
= lr_token (ldfile
, charmap
, NULL
);
2729 if (now
->tok
== tok_eol
|| now
->tok
== tok_eof
)
2731 /* Nothing found. We tell the user. */
2732 lr_error (ldfile
, _("\
2733 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2736 if (now
->tok
!= tok_semicolon
)
2737 goto translit_syntax
;
2742 else if (now
->tok
== tok_translit_ignore
)
2744 read_translit_ignore_entry (ldfile
, ctype
, charmap
,
2749 read_translit_entry (ldfile
, ctype
, now
, charmap
, repertoire
);
2751 ldfile
->return_widestr
= 0;
2755 /* Ignore the rest of the line if we don't need the input of
2759 lr_ignore_rest (ldfile
, 0);
2763 /* This could mean one of several things. First test whether
2764 it's a character class name. */
2765 for (cnt
= 0; cnt
< ctype
->nr_charclass
; ++cnt
)
2766 if (strcmp (now
->val
.str
.startmb
, ctype
->classnames
[cnt
]) == 0)
2768 if (cnt
< ctype
->nr_charclass
)
2770 class_bit
= _ISwbit (cnt
);
2771 class256_bit
= cnt
<= 11 ? _ISbit (cnt
) : 0;
2772 free (now
->val
.str
.startmb
);
2773 goto read_charclass
;
2775 for (cnt
= 0; cnt
< ctype
->map_collection_nr
; ++cnt
)
2776 if (strcmp (now
->val
.str
.startmb
, ctype
->mapnames
[cnt
]) == 0)
2778 if (cnt
< ctype
->map_collection_nr
)
2781 free (now
->val
.str
.startmb
);
2784 #ifdef PREDEFINED_CLASSES
2785 if (strcmp (now
->val
.str
.startmb
, "special1") == 0)
2787 class_bit
= _ISwspecial1
;
2788 free (now
->val
.str
.startmb
);
2789 goto read_charclass
;
2791 if (strcmp (now
->val
.str
.startmb
, "special2") == 0)
2793 class_bit
= _ISwspecial2
;
2794 free (now
->val
.str
.startmb
);
2795 goto read_charclass
;
2797 if (strcmp (now
->val
.str
.startmb
, "special3") == 0)
2799 class_bit
= _ISwspecial3
;
2800 free (now
->val
.str
.startmb
);
2801 goto read_charclass
;
2803 if (strcmp (now
->val
.str
.startmb
, "tosymmetric") == 0)
2812 /* Next we assume `LC_CTYPE'. */
2813 now
= lr_token (ldfile
, charmap
, NULL
);
2814 if (now
->tok
== tok_eof
)
2816 if (now
->tok
== tok_eol
)
2817 lr_error (ldfile
, _("%s: incomplete `END' line"),
2819 else if (now
->tok
!= tok_lc_ctype
)
2820 lr_error (ldfile
, _("\
2821 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2822 lr_ignore_rest (ldfile
, now
->tok
== tok_lc_ctype
);
2827 if (now
->tok
!= tok_eof
)
2828 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2831 /* Prepare for the next round. */
2832 now
= lr_token (ldfile
, charmap
, NULL
);
2836 /* When we come here we reached the end of the file. */
2837 lr_error (ldfile
, _("%s: premature end of file"), "LC_CTYPE");
2842 set_class_defaults (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
2843 struct repertoire_t
*repertoire
)
2847 /* These function defines the default values for the classes and conversions
2848 according to POSIX.2 2.5.2.1.
2849 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2850 Don't move them unless you know what you do! */
2852 void set_default (int bitpos
, int from
, int to
)
2856 int bit
= _ISbit (bitpos
);
2857 int bitw
= _ISwbit (bitpos
);
2858 /* Define string. */
2861 for (ch
= from
; ch
<= to
; ++ch
)
2863 struct charseq
*seq
;
2866 seq
= charmap_find_value (charmap
, tmp
, 1);
2870 sprintf (buf
, "U%08X", ch
);
2871 seq
= charmap_find_value (charmap
, buf
, 9);
2877 %s: character `%s' not defined in charmap while needed as default value"),
2880 else if (seq
->nbytes
!= 1)
2882 %s: character `%s' in charmap not representable with one byte"),
2885 ctype
->class256_collection
[seq
->bytes
[0]] |= bit
;
2887 /* No need to search here, the ASCII value is also the Unicode
2889 ELEM (ctype
, class_collection
, , ch
) |= bitw
;
2893 /* Set default values if keyword was not present. */
2894 if ((ctype
->class_done
& BITw (tok_upper
)) == 0)
2895 /* "If this keyword [lower] is not specified, the lowercase letters
2896 `A' through `Z', ..., shall automatically belong to this class,
2897 with implementation defined character values." [P1003.2, 2.5.2.1] */
2898 set_default (BITPOS (tok_upper
), 'A', 'Z');
2900 if ((ctype
->class_done
& BITw (tok_lower
)) == 0)
2901 /* "If this keyword [lower] is not specified, the lowercase letters
2902 `a' through `z', ..., shall automatically belong to this class,
2903 with implementation defined character values." [P1003.2, 2.5.2.1] */
2904 set_default (BITPOS (tok_lower
), 'a', 'z');
2906 if ((ctype
->class_done
& BITw (tok_alpha
)) == 0)
2908 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2909 class `lower' *must* be in class `alpha'. */
2910 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
);
2911 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
);
2913 for (cnt
= 0; cnt
< 256; ++cnt
)
2914 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2915 ctype
->class256_collection
[cnt
] |= BIT (tok_alpha
);
2917 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2918 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2919 ctype
->class_collection
[cnt
] |= BITw (tok_alpha
);
2922 if ((ctype
->class_done
& BITw (tok_digit
)) == 0)
2923 /* "If this keyword [digit] is not specified, the digits `0' through
2924 `9', ..., shall automatically belong to this class, with
2925 implementation-defined character values." [P1003.2, 2.5.2.1] */
2926 set_default (BITPOS (tok_digit
), '0', '9');
2928 /* "Only characters specified for the `alpha' and `digit' keyword
2929 shall be specified. Characters specified for the keyword `alpha'
2930 and `digit' are automatically included in this class. */
2932 unsigned long int mask
= BIT (tok_alpha
) | BIT (tok_digit
);
2933 unsigned long int maskw
= BITw (tok_alpha
) | BITw (tok_digit
);
2935 for (cnt
= 0; cnt
< 256; ++cnt
)
2936 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
2937 ctype
->class256_collection
[cnt
] |= BIT (tok_alnum
);
2939 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
2940 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
2941 ctype
->class_collection
[cnt
] |= BITw (tok_alnum
);
2944 if ((ctype
->class_done
& BITw (tok_space
)) == 0)
2945 /* "If this keyword [space] is not specified, the characters <space>,
2946 <form-feed>, <newline>, <carriage-return>, <tab>, and
2947 <vertical-tab>, ..., shall automatically belong to this class,
2948 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2950 struct charseq
*seq
;
2952 seq
= charmap_find_value (charmap
, "space", 5);
2954 seq
= charmap_find_value (charmap
, "SP", 2);
2956 seq
= charmap_find_value (charmap
, "U00000020", 9);
2961 %s: character `%s' not defined while needed as default value"),
2962 "LC_CTYPE", "<space>");
2964 else if (seq
->nbytes
!= 1)
2966 %s: character `%s' in charmap not representable with one byte"),
2967 "LC_CTYPE", "<space>");
2969 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2971 /* No need to search. */
2972 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_space
);
2974 seq
= charmap_find_value (charmap
, "form-feed", 9);
2976 seq
= charmap_find_value (charmap
, "U0000000C", 9);
2981 %s: character `%s' not defined while needed as default value"),
2982 "LC_CTYPE", "<form-feed>");
2984 else if (seq
->nbytes
!= 1)
2986 %s: character `%s' in charmap not representable with one byte"),
2987 "LC_CTYPE", "<form-feed>");
2989 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
2991 /* No need to search. */
2992 ELEM (ctype
, class_collection
, , L
'\f') |= BITw (tok_space
);
2995 seq
= charmap_find_value (charmap
, "newline", 7);
2997 seq
= charmap_find_value (charmap
, "U0000000A", 9);
3002 character `%s' not defined while needed as default value"),
3005 else if (seq
->nbytes
!= 1)
3007 %s: character `%s' in charmap not representable with one byte"),
3008 "LC_CTYPE", "<newline>");
3010 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3012 /* No need to search. */
3013 ELEM (ctype
, class_collection
, , L
'\n') |= BITw (tok_space
);
3016 seq
= charmap_find_value (charmap
, "carriage-return", 15);
3018 seq
= charmap_find_value (charmap
, "U0000000D", 9);
3023 %s: character `%s' not defined while needed as default value"),
3024 "LC_CTYPE", "<carriage-return>");
3026 else if (seq
->nbytes
!= 1)
3028 %s: character `%s' in charmap not representable with one byte"),
3029 "LC_CTYPE", "<carriage-return>");
3031 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3033 /* No need to search. */
3034 ELEM (ctype
, class_collection
, , L
'\r') |= BITw (tok_space
);
3037 seq
= charmap_find_value (charmap
, "tab", 3);
3039 seq
= charmap_find_value (charmap
, "U00000009", 9);
3044 %s: character `%s' not defined while needed as default value"),
3045 "LC_CTYPE", "<tab>");
3047 else if (seq
->nbytes
!= 1)
3049 %s: character `%s' in charmap not representable with one byte"),
3050 "LC_CTYPE", "<tab>");
3052 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3054 /* No need to search. */
3055 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_space
);
3058 seq
= charmap_find_value (charmap
, "vertical-tab", 12);
3060 seq
= charmap_find_value (charmap
, "U0000000B", 9);
3065 %s: character `%s' not defined while needed as default value"),
3066 "LC_CTYPE", "<vertical-tab>");
3068 else if (seq
->nbytes
!= 1)
3070 %s: character `%s' in charmap not representable with one byte"),
3071 "LC_CTYPE", "<vertical-tab>");
3073 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_space
);
3075 /* No need to search. */
3076 ELEM (ctype
, class_collection
, , L
'\v') |= BITw (tok_space
);
3079 if ((ctype
->class_done
& BITw (tok_xdigit
)) == 0)
3080 /* "If this keyword is not specified, the digits `0' to `9', the
3081 uppercase letters `A' through `F', and the lowercase letters `a'
3082 through `f', ..., shell automatically belong to this class, with
3083 implementation defined character values." [P1003.2, 2.5.2.1] */
3085 set_default (BITPOS (tok_xdigit
), '0', '9');
3086 set_default (BITPOS (tok_xdigit
), 'A', 'F');
3087 set_default (BITPOS (tok_xdigit
), 'a', 'f');
3090 if ((ctype
->class_done
& BITw (tok_blank
)) == 0)
3091 /* "If this keyword [blank] is unspecified, the characters <space> and
3092 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3094 struct charseq
*seq
;
3096 seq
= charmap_find_value (charmap
, "space", 5);
3098 seq
= charmap_find_value (charmap
, "SP", 2);
3100 seq
= charmap_find_value (charmap
, "U00000020", 9);
3105 %s: character `%s' not defined while needed as default value"),
3106 "LC_CTYPE", "<space>");
3108 else if (seq
->nbytes
!= 1)
3110 %s: character `%s' in charmap not representable with one byte"),
3111 "LC_CTYPE", "<space>");
3113 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3115 /* No need to search. */
3116 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_blank
);
3119 seq
= charmap_find_value (charmap
, "tab", 3);
3121 seq
= charmap_find_value (charmap
, "U00000009", 9);
3126 %s: character `%s' not defined while needed as default value"),
3127 "LC_CTYPE", "<tab>");
3129 else if (seq
->nbytes
!= 1)
3131 %s: character `%s' in charmap not representable with one byte"),
3132 "LC_CTYPE", "<tab>");
3134 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_blank
);
3136 /* No need to search. */
3137 ELEM (ctype
, class_collection
, , L
'\t') |= BITw (tok_blank
);
3140 if ((ctype
->class_done
& BITw (tok_graph
)) == 0)
3141 /* "If this keyword [graph] is not specified, characters specified for
3142 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3143 shall belong to this character class." [P1003.2, 2.5.2.1] */
3145 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3146 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3147 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3148 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3152 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3153 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3154 ctype
->class_collection
[cnt
] |= BITw (tok_graph
);
3156 for (cnt
= 0; cnt
< 256; ++cnt
)
3157 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3158 ctype
->class256_collection
[cnt
] |= BIT (tok_graph
);
3161 if ((ctype
->class_done
& BITw (tok_print
)) == 0)
3162 /* "If this keyword [print] is not provided, characters specified for
3163 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3164 and the <space> character shall belong to this character class."
3165 [P1003.2, 2.5.2.1] */
3167 unsigned long int mask
= BIT (tok_upper
) | BIT (tok_lower
) |
3168 BIT (tok_alpha
) | BIT (tok_digit
) | BIT (tok_xdigit
) | BIT (tok_punct
);
3169 unsigned long int maskw
= BITw (tok_upper
) | BITw (tok_lower
) |
3170 BITw (tok_alpha
) | BITw (tok_digit
) | BITw (tok_xdigit
) |
3173 struct charseq
*seq
;
3175 for (cnt
= 0; cnt
< ctype
->class_collection_act
; ++cnt
)
3176 if ((ctype
->class_collection
[cnt
] & maskw
) != 0)
3177 ctype
->class_collection
[cnt
] |= BITw (tok_print
);
3179 for (cnt
= 0; cnt
< 256; ++cnt
)
3180 if ((ctype
->class256_collection
[cnt
] & mask
) != 0)
3181 ctype
->class256_collection
[cnt
] |= BIT (tok_print
);
3184 seq
= charmap_find_value (charmap
, "space", 5);
3186 seq
= charmap_find_value (charmap
, "SP", 2);
3188 seq
= charmap_find_value (charmap
, "U00000020", 9);
3193 %s: character `%s' not defined while needed as default value"),
3194 "LC_CTYPE", "<space>");
3196 else if (seq
->nbytes
!= 1)
3198 %s: character `%s' in charmap not representable with one byte"),
3199 "LC_CTYPE", "<space>");
3201 ctype
->class256_collection
[seq
->bytes
[0]] |= BIT (tok_print
);
3203 /* No need to search. */
3204 ELEM (ctype
, class_collection
, , L
' ') |= BITw (tok_print
);
3207 if (ctype
->tomap_done
[0] == 0)
3208 /* "If this keyword [toupper] is not specified, the lowercase letters
3209 `a' through `z', and their corresponding uppercase letters `A' to
3210 `Z', ..., shall automatically be included, with implementation-
3211 defined character values." [P1003.2, 2.5.2.1] */
3216 strcpy (tmp
, "<?>");
3218 for (ch
= 'a'; ch
<= 'z'; ++ch
)
3220 struct charseq
*seq_from
, *seq_to
;
3224 seq_from
= charmap_find_value (charmap
, &tmp
[1], 1);
3225 if (seq_from
== NULL
)
3228 sprintf (buf
, "U%08X", ch
);
3229 seq_from
= charmap_find_value (charmap
, buf
, 9);
3231 if (seq_from
== NULL
)
3235 %s: character `%s' not defined while needed as default value"),
3238 else if (seq_from
->nbytes
!= 1)
3242 %s: character `%s' needed as default value not representable with one byte"),
3247 /* This conversion is implementation defined. */
3248 tmp
[1] = (char) (ch
+ ('A' - 'a'));
3249 seq_to
= charmap_find_value (charmap
, &tmp
[1], 1);
3253 sprintf (buf
, "U%08X", ch
+ ('A' - 'a'));
3254 seq_to
= charmap_find_value (charmap
, buf
, 9);
3260 %s: character `%s' not defined while needed as default value"),
3263 else if (seq_to
->nbytes
!= 1)
3267 %s: character `%s' needed as default value not representable with one byte"),
3271 /* The index [0] is determined by the order of the
3272 `ctype_map_newP' calls in `ctype_startup'. */
3273 ctype
->map256_collection
[0][seq_from
->bytes
[0]]
3277 /* No need to search. */
3278 ELEM (ctype
, map_collection
, [0], ch
) = ch
+ ('A' - 'a');
3282 if (ctype
->tomap_done
[1] == 0)
3283 /* "If this keyword [tolower] is not specified, the mapping shall be
3284 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3286 for (cnt
= 0; cnt
< ctype
->map_collection_act
[0]; ++cnt
)
3287 if (ctype
->map_collection
[0][cnt
] != 0)
3288 ELEM (ctype
, map_collection
, [1],
3289 ctype
->map_collection
[0][cnt
])
3290 = ctype
->charnames
[cnt
];
3292 for (cnt
= 0; cnt
< 256; ++cnt
)
3293 if (ctype
->map256_collection
[0][cnt
] != 0)
3294 ctype
->map256_collection
[1][ctype
->map256_collection
[0][cnt
]] = cnt
;
3297 if (ctype
->outdigits_act
!= 10)
3299 if (ctype
->outdigits_act
!= 0)
3300 error (0,0, _("%s: field `%s' does not contain exactly ten entries"),
3301 "LC_CTYPE", "outdigit");
3303 for (cnt
= ctype
->outdigits_act
; cnt
< 10; ++cnt
)
3305 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3308 if (ctype
->mboutdigits
[cnt
] == NULL
)
3309 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3311 strlen (longnames
[cnt
]));
3313 if (ctype
->mboutdigits
[cnt
] == NULL
)
3314 ctype
->mboutdigits
[cnt
] = charmap_find_symbol (charmap
,
3317 if (ctype
->mboutdigits
[cnt
] == NULL
)
3319 /* Provide a replacement. */
3321 no output digits defined and none of the standard names in the charmap"));
3323 ctype
->mboutdigits
[cnt
] = obstack_alloc (&charmap
->mem_pool
,
3324 sizeof (struct charseq
)
3327 /* This is better than nothing. */
3328 ctype
->mboutdigits
[cnt
]->bytes
[0] = digits
[cnt
];
3329 ctype
->mboutdigits
[cnt
]->nbytes
= 1;
3332 ctype
->wcoutdigits
[cnt
] = L
'0' + cnt
;
3335 ctype
->outdigits_act
= 10;
3340 /* Construction of sparse 3-level tables.
3341 See wchar-lookup.h for their structure and the meaning of p and q. */
3348 /* Working representation. */
3349 size_t level1_alloc
;
3352 size_t level2_alloc
;
3355 size_t level3_alloc
;
3358 /* Compressed representation. */
3363 /* Initialize. Assumes t->p and t->q have already been set. */
3365 wctype_table_init (struct wctype_table
*t
)
3367 t
->level1_alloc
= t
->level1_size
= 0;
3368 t
->level2_alloc
= t
->level2_size
= 0;
3369 t
->level3_alloc
= t
->level3_size
= 0;
3372 /* Retrieve an entry. */
3374 wctype_table_get (struct wctype_table
*t
, uint32_t wc
)
3376 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3377 if (index1
< t
->level1_size
)
3379 uint32_t lookup1
= t
->level1
[index1
];
3380 if (lookup1
!= ~((uint32_t) 0))
3382 uint32_t index2
= ((wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1))
3383 + (lookup1
<< t
->q
);
3384 uint32_t lookup2
= t
->level2
[index2
];
3385 if (lookup2
!= ~((uint32_t) 0))
3387 uint32_t index3
= ((wc
>> 5) & ((1 << t
->p
) - 1))
3388 + (lookup2
<< t
->p
);
3389 uint32_t lookup3
= t
->level3
[index3
];
3390 uint32_t index4
= wc
& 0x1f;
3392 return (lookup3
>> index4
) & 1;
3399 /* Add one entry. */
3401 wctype_table_add (struct wctype_table
*t
, uint32_t wc
)
3403 uint32_t index1
= wc
>> (t
->q
+ t
->p
+ 5);
3404 uint32_t index2
= (wc
>> (t
->p
+ 5)) & ((1 << t
->q
) - 1);
3405 uint32_t index3
= (wc
>> 5) & ((1 << t
->p
) - 1);
3406 uint32_t index4
= wc
& 0x1f;
3409 if (index1
>= t
->level1_size
)
3411 if (index1
>= t
->level1_alloc
)
3413 size_t alloc
= 2 * t
->level1_alloc
;
3414 if (alloc
<= index1
)
3416 t
->level1
= (t
->level1_alloc
> 0
3417 ? (uint32_t *) xrealloc ((char *) t
->level1
,
3418 alloc
* sizeof (uint32_t))
3419 : (uint32_t *) xmalloc (alloc
* sizeof (uint32_t)));
3420 t
->level1_alloc
= alloc
;
3422 while (index1
>= t
->level1_size
)
3423 t
->level1
[t
->level1_size
++] = ~((uint32_t) 0);
3426 if (t
->level1
[index1
] == ~((uint32_t) 0))
3428 if (t
->level2_size
== t
->level2_alloc
)
3430 size_t alloc
= 2 * t
->level2_alloc
+ 1;
3431 t
->level2
= (t
->level2_alloc
> 0
3432 ? (uint32_t *) xrealloc ((char *) t
->level2
,
3433 (alloc
<< t
->q
) * sizeof (uint32_t))
3434 : (uint32_t *) xmalloc ((alloc
<< t
->q
) * sizeof (uint32_t)));
3435 t
->level2_alloc
= alloc
;
3437 i1
= t
->level2_size
<< t
->q
;
3438 i2
= (t
->level2_size
+ 1) << t
->q
;
3439 for (i
= i1
; i
< i2
; i
++)
3440 t
->level2
[i
] = ~((uint32_t) 0);
3441 t
->level1
[index1
] = t
->level2_size
++;
3444 index2
+= t
->level1
[index1
] << t
->q
;
3446 if (t
->level2
[index2
] == ~((uint32_t) 0))
3448 if (t
->level3_size
== t
->level3_alloc
)
3450 size_t alloc
= 2 * t
->level3_alloc
+ 1;
3451 t
->level3
= (t
->level3_alloc
> 0
3452 ? (uint32_t *) xrealloc ((char *) t
->level3
,
3453 (alloc
<< t
->p
) * sizeof (uint32_t))
3454 : (uint32_t *) xmalloc ((alloc
<< t
->p
) * sizeof (uint32_t)));
3455 t
->level3_alloc
= alloc
;
3457 i1
= t
->level3_size
<< t
->p
;
3458 i2
= (t
->level3_size
+ 1) << t
->p
;
3459 for (i
= i1
; i
< i2
; i
++)
3461 t
->level2
[index2
] = t
->level3_size
++;
3464 index3
+= t
->level2
[index2
] << t
->p
;
3466 t
->level3
[index3
] |= (uint32_t)1 << index4
;
3469 /* Finalize and shrink. */
3471 wctype_table_finalize (struct wctype_table
*t
)
3474 uint32_t reorder3
[t
->level3_size
];
3475 uint32_t reorder2
[t
->level2_size
];
3476 uint32_t level1_offset
, level2_offset
, level3_offset
;
3478 /* Uniquify level3 blocks. */
3480 for (j
= 0; j
< t
->level3_size
; j
++)
3482 for (i
= 0; i
< k
; i
++)
3483 if (memcmp (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3484 (1 << t
->p
) * sizeof (uint32_t)) == 0)
3486 /* Relocate block j to block i. */
3491 memcpy (&t
->level3
[i
<< t
->p
], &t
->level3
[j
<< t
->p
],
3492 (1 << t
->p
) * sizeof (uint32_t));
3498 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3499 if (t
->level2
[i
] != ~((uint32_t) 0))
3500 t
->level2
[i
] = reorder3
[t
->level2
[i
]];
3502 /* Uniquify level2 blocks. */
3504 for (j
= 0; j
< t
->level2_size
; j
++)
3506 for (i
= 0; i
< k
; i
++)
3507 if (memcmp (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3508 (1 << t
->q
) * sizeof (uint32_t)) == 0)
3510 /* Relocate block j to block i. */
3515 memcpy (&t
->level2
[i
<< t
->q
], &t
->level2
[j
<< t
->q
],
3516 (1 << t
->q
) * sizeof (uint32_t));
3522 for (i
= 0; i
< t
->level1_size
; i
++)
3523 if (t
->level1
[i
] != ~((uint32_t) 0))
3524 t
->level1
[i
] = reorder2
[t
->level1
[i
]];
3526 /* Create and fill the resulting compressed representation. */
3528 5 * sizeof (uint32_t)
3529 + t
->level1_size
* sizeof (uint32_t)
3530 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t)
3531 + (t
->level3_size
<< t
->p
) * sizeof (uint32_t);
3532 t
->result
= (char *) xmalloc (t
->result_size
);
3535 5 * sizeof (uint32_t);
3537 5 * sizeof (uint32_t)
3538 + t
->level1_size
* sizeof (uint32_t);
3540 5 * sizeof (uint32_t)
3541 + t
->level1_size
* sizeof (uint32_t)
3542 + (t
->level2_size
<< t
->q
) * sizeof (uint32_t);
3544 ((uint32_t *) t
->result
)[0] = t
->q
+ t
->p
+ 5;
3545 ((uint32_t *) t
->result
)[1] = t
->level1_size
;
3546 ((uint32_t *) t
->result
)[2] = t
->p
+ 5;
3547 ((uint32_t *) t
->result
)[3] = (1 << t
->q
) - 1;
3548 ((uint32_t *) t
->result
)[4] = (1 << t
->p
) - 1;
3550 for (i
= 0; i
< t
->level1_size
; i
++)
3551 ((uint32_t *) (t
->result
+ level1_offset
))[i
] =
3552 (t
->level1
[i
] == ~((uint32_t) 0)
3554 : (t
->level1
[i
] << t
->q
) * sizeof (uint32_t) + level2_offset
);
3556 for (i
= 0; i
< (t
->level2_size
<< t
->q
); i
++)
3557 ((uint32_t *) (t
->result
+ level2_offset
))[i
] =
3558 (t
->level2
[i
] == ~((uint32_t) 0)
3560 : (t
->level2
[i
] << t
->p
) * sizeof (uint32_t) + level3_offset
);
3562 for (i
= 0; i
< (t
->level3_size
<< t
->p
); i
++)
3563 ((uint32_t *) (t
->result
+ level3_offset
))[i
] = t
->level3
[i
];
3565 if (t
->level1_alloc
> 0)
3567 if (t
->level2_alloc
> 0)
3569 if (t
->level3_alloc
> 0)
3573 #define TABLE wcwidth_table
3574 #define ELEMENT uint8_t
3575 #define DEFAULT 0xff
3578 #define TABLE wctrans_table
3579 #define ELEMENT int32_t
3581 #define wctrans_table_add wctrans_table_add_internal
3583 #undef wctrans_table_add
3584 /* The wctrans_table must actually store the difference between the
3585 desired result and the argument. */
3587 wctrans_table_add (struct wctrans_table
*t
, uint32_t wc
, uint32_t mapped_wc
)
3589 wctrans_table_add_internal (t
, wc
, mapped_wc
- wc
);
3594 allocate_arrays (struct locale_ctype_t
*ctype
, struct charmap_t
*charmap
,
3595 struct repertoire_t
*repertoire
)
3603 /* You wonder about this amount of memory? This is only because some
3604 users do not manage to address the array with unsigned values or
3605 data types with range >= 256. '\200' would result in the array
3606 index -128. To help these poor people we duplicate the entries for
3607 128 up to 255 below the entry for \0. */
3608 ctype
->ctype_b
= (char_class_t
*) xcalloc (256 + 128, sizeof (char_class_t
));
3609 ctype
->ctype32_b
= (char_class32_t
*) xcalloc (256, sizeof (char_class32_t
));
3610 ctype
->class_b
= (uint32_t **)
3611 xmalloc (ctype
->nr_charclass
* sizeof (uint32_t *));
3612 ctype
->class_3level
= (struct iovec
*)
3613 xmalloc (ctype
->nr_charclass
* sizeof (struct iovec
));
3615 /* This is the array accessed using the multibyte string elements. */
3616 for (idx
= 0; idx
< 256; ++idx
)
3617 ctype
->ctype_b
[128 + idx
] = ctype
->class256_collection
[idx
];
3619 /* Mirror first 127 entries. We must take care that entry -1 is not
3620 mirrored because EOF == -1. */
3621 for (idx
= 0; idx
< 127; ++idx
)
3622 ctype
->ctype_b
[idx
] = ctype
->ctype_b
[256 + idx
];
3624 /* The 32 bit array contains all characters < 0x100. */
3625 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3626 if (ctype
->charnames
[idx
] < 0x100)
3627 ctype
->ctype32_b
[ctype
->charnames
[idx
]] = ctype
->class_collection
[idx
];
3629 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3631 ctype
->class_b
[nr
] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3633 for (idx
= 0; idx
< 256; ++idx
)
3634 if (ctype
->class256_collection
[idx
] & _ISbit (nr
))
3635 ctype
->class_b
[nr
][idx
>> 5] |= (uint32_t)1 << (idx
& 0x1f);
3638 for (nr
= 0; nr
< ctype
->nr_charclass
; nr
++)
3640 struct wctype_table t
;
3642 t
.p
= 4; /* or: 5 */
3643 t
.q
= 7; /* or: 6 */
3644 wctype_table_init (&t
);
3646 for (idx
= 0; idx
< ctype
->class_collection_act
; ++idx
)
3647 if (ctype
->class_collection
[idx
] & _ISwbit (nr
))
3648 wctype_table_add (&t
, ctype
->charnames
[idx
]);
3650 wctype_table_finalize (&t
);
3653 fprintf (stderr
, _("%s: table for class \"%s\": %lu bytes\n"),
3654 "LC_CTYPE", ctype
->classnames
[nr
],
3655 (unsigned long int) t
.result_size
);
3657 ctype
->class_3level
[nr
].iov_base
= t
.result
;
3658 ctype
->class_3level
[nr
].iov_len
= t
.result_size
;
3661 /* Room for table of mappings. */
3662 ctype
->map_b
= (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3663 ctype
->map32_b
= (uint32_t **) xmalloc (ctype
->map_collection_nr
3664 * sizeof (uint32_t *));
3665 ctype
->map_3level
= (struct iovec
*)
3666 xmalloc (ctype
->map_collection_nr
* sizeof (struct iovec
));
3668 /* Fill in all mappings. */
3669 for (idx
= 0; idx
< 2; ++idx
)
3673 /* Allocate table. */
3674 ctype
->map_b
[idx
] = (uint32_t *)
3675 xmalloc ((256 + 128) * sizeof (uint32_t));
3677 /* Copy values from collection. */
3678 for (idx2
= 0; idx2
< 256; ++idx2
)
3679 ctype
->map_b
[idx
][128 + idx2
] = ctype
->map256_collection
[idx
][idx2
];
3681 /* Mirror first 127 entries. We must take care not to map entry
3682 -1 because EOF == -1. */
3683 for (idx2
= 0; idx2
< 127; ++idx2
)
3684 ctype
->map_b
[idx
][idx2
] = ctype
->map_b
[idx
][256 + idx2
];
3686 /* EOF must map to EOF. */
3687 ctype
->map_b
[idx
][127] = EOF
;
3690 for (idx
= 0; idx
< ctype
->map_collection_nr
; ++idx
)
3694 /* Allocate table. */
3695 ctype
->map32_b
[idx
] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3697 /* Copy values from collection. Default is identity mapping. */
3698 for (idx2
= 0; idx2
< 256; ++idx2
)
3699 ctype
->map32_b
[idx
][idx2
] =
3700 (ctype
->map_collection
[idx
][idx2
] != 0
3701 ? ctype
->map_collection
[idx
][idx2
]
3705 for (nr
= 0; nr
< ctype
->map_collection_nr
; nr
++)
3707 struct wctrans_table t
;
3711 wctrans_table_init (&t
);
3713 for (idx
= 0; idx
< ctype
->map_collection_act
[nr
]; ++idx
)
3714 if (ctype
->map_collection
[nr
][idx
] != 0)
3715 wctrans_table_add (&t
, ctype
->charnames
[idx
],
3716 ctype
->map_collection
[nr
][idx
]);
3718 wctrans_table_finalize (&t
);
3721 fprintf (stderr
, _("%s: table for map \"%s\": %lu bytes\n"),
3722 "LC_CTYPE", ctype
->mapnames
[nr
],
3723 (unsigned long int) t
.result_size
);
3725 ctype
->map_3level
[nr
].iov_base
= t
.result
;
3726 ctype
->map_3level
[nr
].iov_len
= t
.result_size
;
3729 /* Extra array for class and map names. */
3730 ctype
->class_name_ptr
= (uint32_t *) xmalloc (ctype
->nr_charclass
3731 * sizeof (uint32_t));
3732 ctype
->map_name_ptr
= (uint32_t *) xmalloc (ctype
->map_collection_nr
3733 * sizeof (uint32_t));
3735 ctype
->class_offset
= _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1
);
3736 ctype
->map_offset
= ctype
->class_offset
+ ctype
->nr_charclass
;
3738 /* Array for width information. Because the expected width are very
3739 small we use only one single byte. This saves space. */
3741 struct wcwidth_table t
;
3745 wcwidth_table_init (&t
);
3747 /* First set all the characters of the character set to the default width. */
3749 while (iterate_table (&charmap
->char_table
, &curs
, &key
, &len
, &vdata
) == 0)
3751 struct charseq
*data
= (struct charseq
*) vdata
;
3753 if (data
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
3754 data
->ucs4
= repertoire_find_value (ctype
->repertoire
,
3757 if (data
->ucs4
!= ILLEGAL_CHAR_VALUE
)
3758 wcwidth_table_add (&t
, data
->ucs4
, charmap
->width_default
);
3761 /* Now add the explicitly specified widths. */
3762 if (charmap
->width_rules
!= NULL
)
3766 for (cnt
= 0; cnt
< charmap
->nwidth_rules
; ++cnt
)
3768 unsigned char bytes
[charmap
->mb_cur_max
];
3769 int nbytes
= charmap
->width_rules
[cnt
].from
->nbytes
;
3771 /* We have the range of character for which the width is
3772 specified described using byte sequences of the multibyte
3773 charset. We have to convert this to UCS4 now. And we
3774 cannot simply convert the beginning and the end of the
3775 sequence, we have to iterate over the byte sequence and
3776 convert it for every single character. */
3777 memcpy (bytes
, charmap
->width_rules
[cnt
].from
->bytes
, nbytes
);
3779 while (nbytes
< charmap
->width_rules
[cnt
].to
->nbytes
3780 || memcmp (bytes
, charmap
->width_rules
[cnt
].to
->bytes
,
3783 /* Find the UCS value for `bytes'. */
3786 struct charseq
*seq
=
3787 charmap_find_symbol (charmap
, bytes
, nbytes
);
3790 wch
= ILLEGAL_CHAR_VALUE
;
3791 else if (seq
->ucs4
!= UNINITIALIZED_CHAR_VALUE
)
3794 wch
= repertoire_find_value (ctype
->repertoire
, seq
->name
,
3795 strlen (seq
->name
));
3797 if (wch
!= ILLEGAL_CHAR_VALUE
)
3798 /* Store the value. */
3799 wcwidth_table_add (&t
, wch
, charmap
->width_rules
[cnt
].width
);
3801 /* "Increment" the bytes sequence. */
3803 while (inner
>= 0 && bytes
[inner
] == 0xff)
3808 /* We have to extend the byte sequence. */
3809 if (nbytes
>= charmap
->width_rules
[cnt
].to
->nbytes
)
3813 memset (&bytes
[1], 0, nbytes
);
3819 while (++inner
< nbytes
)
3826 wcwidth_table_finalize (&t
);
3829 fprintf (stderr
, _("%s: table for width: %lu bytes\n"),
3830 "LC_CTYPE", (unsigned long int) t
.result_size
);
3832 ctype
->width
.iov_base
= t
.result
;
3833 ctype
->width
.iov_len
= t
.result_size
;
3836 /* Set MB_CUR_MAX. */
3837 ctype
->mb_cur_max
= charmap
->mb_cur_max
;
3839 /* Now determine the table for the transliteration information.
3841 XXX It is not yet clear to me whether it is worth implementing a
3842 complicated algorithm which uses a hash table to locate the entries.
3843 For now I'll use a simple array which can be searching using binary
3845 if (ctype
->translit_copy_locale
!= NULL
)
3847 /* Fold in the transliteration information from the locale mentioned
3848 in the `include' statement. */
3849 struct locale_ctype_t
*here
= ctype
;
3853 struct localedef_t
*other
= find_locale (LC_CTYPE
,
3854 here
->translit_copy_locale
,
3855 repertoire
->name
, charmap
);
3860 %s: transliteration data from locale `%s' not available"),
3861 "LC_CTYPE", here
->translit_copy_locale
);
3865 here
= other
->categories
[LC_CTYPE
].ctype
;
3867 /* Enqueue the information if necessary. */
3868 if (here
->translit
!= NULL
)
3870 struct translit_t
*endp
= here
->translit
;
3871 while (endp
->next
!= NULL
)
3874 endp
->next
= ctype
->translit
;
3875 ctype
->translit
= here
->translit
;
3878 while (here
->translit_copy_locale
!= NULL
);
3881 if (ctype
->translit
!= NULL
)
3883 /* First count how many entries we have. This is the upper limit
3884 since some entries from the included files might be overwritten. */
3887 struct translit_t
*runp
= ctype
->translit
;
3888 struct translit_t
**sorted
;
3889 size_t from_len
, to_len
;
3891 while (runp
!= NULL
)
3897 /* Next we allocate an array large enough and fill in the values. */
3898 sorted
= (struct translit_t
**) alloca (number
3899 * sizeof (struct translit_t
**));
3900 runp
= ctype
->translit
;
3904 /* Search for the place where to insert this string.
3905 XXX Better use a real sorting algorithm later. */
3909 while (idx
< number
)
3911 int res
= wcscmp ((const wchar_t *) sorted
[idx
]->from
,
3912 (const wchar_t *) runp
->from
);
3927 memmove (&sorted
[idx
+ 1], &sorted
[idx
],
3928 (number
- idx
) * sizeof (struct translit_t
*));
3935 while (runp
!= NULL
);
3937 /* The next step is putting all the possible transliteration
3938 strings in one memory block so that we can write it out.
3939 We need several different blocks:
3940 - index to the from-string array
3942 - index to the to-string array
3945 from_len
= to_len
= 0;
3946 for (cnt
= 0; cnt
< number
; ++cnt
)
3948 struct translit_to_t
*srunp
;
3949 from_len
+= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3950 srunp
= sorted
[cnt
]->to
;
3951 while (srunp
!= NULL
)
3953 to_len
+= wcslen ((const wchar_t *) srunp
->str
) + 1;
3954 srunp
= srunp
->next
;
3956 /* Plus one for the extra NUL character marking the end of
3957 the list for the current entry. */
3961 /* We can allocate the arrays for the results. */
3962 ctype
->translit_from_idx
= xmalloc (number
* sizeof (uint32_t));
3963 ctype
->translit_from_tbl
= xmalloc (from_len
* sizeof (uint32_t));
3964 ctype
->translit_to_idx
= xmalloc (number
* sizeof (uint32_t));
3965 ctype
->translit_to_tbl
= xmalloc (to_len
* sizeof (uint32_t));
3969 for (cnt
= 0; cnt
< number
; ++cnt
)
3972 struct translit_to_t
*srunp
;
3974 ctype
->translit_from_idx
[cnt
] = from_len
;
3975 ctype
->translit_to_idx
[cnt
] = to_len
;
3977 len
= wcslen ((const wchar_t *) sorted
[cnt
]->from
) + 1;
3978 wmemcpy ((wchar_t *) &ctype
->translit_from_tbl
[from_len
],
3979 (const wchar_t *) sorted
[cnt
]->from
, len
);
3982 ctype
->translit_to_idx
[cnt
] = to_len
;
3983 srunp
= sorted
[cnt
]->to
;
3984 while (srunp
!= NULL
)
3986 len
= wcslen ((const wchar_t *) srunp
->str
) + 1;
3987 wmemcpy ((wchar_t *) &ctype
->translit_to_tbl
[to_len
],
3988 (const wchar_t *) srunp
->str
, len
);
3990 srunp
= srunp
->next
;
3992 ctype
->translit_to_tbl
[to_len
++] = L
'\0';
3995 /* Store the information about the length. */
3996 ctype
->translit_idx_size
= number
;
3997 ctype
->translit_from_tbl_size
= from_len
* sizeof (uint32_t);
3998 ctype
->translit_to_tbl_size
= to_len
* sizeof (uint32_t);
4002 /* Provide some dummy pointers since we have nothing to write out. */
4003 static uint32_t no_str
= { 0 };
4005 ctype
->translit_from_idx
= &no_str
;
4006 ctype
->translit_from_tbl
= &no_str
;
4007 ctype
->translit_to_tbl
= &no_str
;
4008 ctype
->translit_idx_size
= 0;
4009 ctype
->translit_from_tbl_size
= 0;
4010 ctype
->translit_to_tbl_size
= 0;