]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
* elf/Makefile (LDFLAGS-dl.so): New variable.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
19bc17a9
RM
1/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2This file is part of the GNU C Library.
3Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
4
5The GNU C Library is free software; you can redistribute it and/or
6modify it under the terms of the GNU Library General Public License as
7published by the Free Software Foundation; either version 2 of the
8License, or (at your option) any later version.
9
10The GNU C Library is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13Library General Public License for more details.
14
15You should have received a copy of the GNU Library General Public
16License along with the GNU C Library; see the file COPYING.LIB. If
17not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18Boston, MA 02111-1307, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include <endian.h>
25#include <limits.h>
26#include <string.h>
27
28#include "locales.h"
29#include "localeinfo.h"
30#include "langinfo.h"
31#include "locfile-token.h"
32#include "stringtrans.h"
33
34/* Uncomment the following line in the production version. */
35/* define NDEBUG 1 */
36#include <assert.h>
37
38
39void *xmalloc (size_t __n);
40void *xcalloc (size_t __n, size_t __s);
41void *xrealloc (void *__ptr, size_t __n);
42
43
44/* The bit used for representing a special class. */
45#define BITPOS(class) ((class) - tok_upper)
46#define BIT(class) (1 << BITPOS (class))
47
48#define ELEM(ctype, collection, idx, value) \
49 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
50 &ctype->collection##_act idx, value)
51
52#define SWAPU32(w) \
53 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
54
55#define SWAPU16(w) \
56 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
57
58
59/* To be compatible with former implementations we for now restrict
60 the number of bits for character classes to 16. When compatibility
61 is not necessary anymore increase the number to 32. */
7a12c6bb 62#define char_class_t u_int16_t
19bc17a9 63#define CHAR_CLASS_TRANS SWAPU16
7a12c6bb 64#define char_class32_t u_int32_t
19bc17a9
RM
65#define CHAR_CLASS32_TRANS SWAPU32
66
67
68/* The real definition of the struct for the LC_CTYPE locale. */
69struct locale_ctype_t
70{
71 unsigned int *charnames;
72 size_t charnames_max;
73 size_t charnames_act;
74
7a12c6bb
RM
75 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
76#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
19bc17a9
RM
77 int nr_charclass;
78 const char *classnames[MAX_NR_CHARCLASS];
79 unsigned long int current_class_mask;
80 unsigned int last_class_char;
7a12c6bb 81 u_int32_t *class_collection;
19bc17a9
RM
82 size_t class_collection_max;
83 size_t class_collection_act;
84 unsigned long int class_done;
85
86 /* If the following number ever turns out to be too small simply
87 increase it. But I doubt it will. --drepper@gnu */
88#define MAX_NR_CHARMAP 16
89 const char *mapnames[MAX_NR_CHARMAP];
7a12c6bb 90 u_int32_t *map_collection[MAX_NR_CHARMAP];
a5b7bf0e
RM
91 u_int32_t map_collection_max[MAX_NR_CHARMAP];
92 u_int32_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
93 size_t map_collection_nr;
94 size_t last_map_idx;
95 unsigned int from_map_char;
96 int toupper_done;
97 int tolower_done;
98
99 /* The arrays for the binary representation. */
7a12c6bb
RM
100 u_int32_t plane_size;
101 u_int32_t plane_cnt;
19bc17a9
RM
102 char_class_t *ctype_b;
103 char_class32_t *ctype32_b;
7a12c6bb
RM
104 u_int32_t *names_el;
105 u_int32_t *names_eb;
106 u_int32_t **map_eb;
107 u_int32_t **map_el;
108 u_int32_t *class_name_ptr;
109 u_int32_t *map_name_ptr;
75cd5204 110 unsigned char *width;
0200214b 111 u_int32_t mb_cur_max;
6990326c 112 const char *codeset_name;
19bc17a9
RM
113};
114
115
116/* Prototypes for local functions. */
117static void ctype_class_newP (struct linereader *lr,
118 struct locale_ctype_t *ctype, const char *name);
119static void ctype_map_newP (struct linereader *lr,
120 struct locale_ctype_t *ctype,
121 const char *name, struct charset_t *charset);
7a12c6bb
RM
122static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
123 size_t *max, size_t *act, unsigned int idx);
19bc17a9
RM
124static void set_class_defaults (struct locale_ctype_t *ctype,
125 struct charset_t *charset);
75cd5204
RM
126static void allocate_arrays (struct locale_ctype_t *ctype,
127 struct charset_t *charset);
19bc17a9
RM
128
129
130void
131ctype_startup (struct linereader *lr, struct localedef_t *locale,
132 struct charset_t *charset)
133{
134 unsigned int cnt;
135 struct locale_ctype_t *ctype;
136
137 /* It is important that we always use UCS1 encoding for strings now. */
138 encoding_method = ENC_UCS1;
139
140 /* Allocate the needed room. */
141 locale->categories[LC_CTYPE].ctype = ctype =
142 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
143
144 /* We have no names seen yet. */
145 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
146 ctype->charnames =
147 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
148 for (cnt = 0; cnt < 256; ++cnt)
149 ctype->charnames[cnt] = cnt;
150 ctype->charnames_act = 256;
151
152 /* Fill character class information. */
153 ctype->nr_charclass = 0;
154 ctype->current_class_mask = 0;
155 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
156 /* The order of the following instructions determines the bit
157 positions! */
158 ctype_class_newP (lr, ctype, "upper");
159 ctype_class_newP (lr, ctype, "lower");
160 ctype_class_newP (lr, ctype, "alpha");
161 ctype_class_newP (lr, ctype, "digit");
162 ctype_class_newP (lr, ctype, "xdigit");
163 ctype_class_newP (lr, ctype, "space");
164 ctype_class_newP (lr, ctype, "print");
165 ctype_class_newP (lr, ctype, "graph");
166 ctype_class_newP (lr, ctype, "blank");
167 ctype_class_newP (lr, ctype, "cntrl");
168 ctype_class_newP (lr, ctype, "punct");
169 ctype_class_newP (lr, ctype, "alnum");
170
171 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
7a12c6bb
RM
172 ctype->class_collection
173 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
174 * ctype->class_collection_max);
19bc17a9
RM
175 memset (ctype->class_collection, '\0',
176 sizeof (unsigned long int) * ctype->class_collection_max);
177 ctype->class_collection_act = 256;
178
179 /* Fill character map information. */
180 ctype->map_collection_nr = 0;
181 ctype->last_map_idx = MAX_NR_CHARMAP;
182 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
183 ctype_map_newP (lr, ctype, "toupper", charset);
184 ctype_map_newP (lr, ctype, "tolower", charset);
185
186 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
187 for (cnt = 0; cnt < 256; ++cnt)
188 {
189 ctype->map_collection[0][cnt] = cnt;
190 ctype->map_collection[1][cnt] = cnt;
191 }
192}
193
194
195void
196ctype_finish (struct localedef_t *locale, struct charset_t *charset)
197{
198 /* See POSIX.2, table 2-6 for the meaning of the following table. */
199#define NCLASS 12
200 static const struct
201 {
202 const char *name;
203 const char allow[NCLASS];
204 }
205 valid_table[NCLASS] =
206 {
207 /* The order is important. See token.h for more information.
208 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
209 { "upper", "--MX-XDDXXX-" },
210 { "lower", "--MX-XDDXXX-" },
211 { "alpha", "---X-XDDXXX-" },
212 { "digit", "XXX--XDDXXX-" },
213 { "xdigit", "-----XDDXXX-" },
214 { "space", "XXXXX------X" },
215 { "print", "---------X--" },
216 { "graph", "---------X--" },
217 { "blank", "XXXXXM-----X" },
218 { "cntrl", "XXXXX-XX--XX" },
219 { "punct", "XXXXX-DD-X-X" },
220 { "alnum", "-----XDDXXX-" }
221 };
222 size_t cnt;
223 int cls1, cls2;
224 unsigned int space_value;
225 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
226
227 /* Set default value for classes not specified. */
228 set_class_defaults (ctype, charset);
229
230 /* Check according to table. */
231 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
232 {
233 unsigned long int tmp;
234
235 tmp = ctype->class_collection[cnt];
236 if (tmp == 0)
237 continue;
238
239 for (cls1 = 0; cls1 < NCLASS; ++cls1)
240 if ((tmp & (1 << cls1)) != 0)
241 for (cls2 = 0; cls2 < NCLASS; ++cls2)
242 if (valid_table[cls1].allow[cls2] != '-')
243 {
244 int eq = (tmp & (1 << cls2)) != 0;
245 switch (valid_table[cls1].allow[cls2])
246 {
247 case 'M':
248 if (!eq)
249 {
250 char buf[17];
251 char *cp = buf;
252 unsigned int value;
253
254 value = ctype->charnames[cnt];
255
256 if ((value & 0xff000000) != 0)
257 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
258 if ((value & 0xffff0000) != 0)
259 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
260 if ((value & 0xffffff00) != 0)
261 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
262 sprintf (cp, "\\%o", value & 0xff);
263
264 error (0, 0, _("\
265character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
266 cp, valid_table[cls1].name,
267 valid_table[cls2].name);
268 }
269 break;
270
271 case 'X':
272 if (eq)
273 {
274 char buf[17];
275 char *cp = buf;
276 unsigned int value;
277
278 value = ctype->charnames[cnt];
279
280 if ((value & 0xff000000) != 0)
281 cp += sprintf (cp, "\\%o", value >> 24);
282 if ((value & 0xffff0000) != 0)
283 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
284 if ((value & 0xffffff00) != 0)
285 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
286 sprintf (cp, "\\%o", value & 0xff);
287
288 error (0, 0, _("\
289character %s'%s' in class `%s' must not be in class `%s'"),
290 value > 256 ? "L" : "", cp,
291 valid_table[cls1].name, valid_table[cls2].name);
292 }
293 break;
294
295 case 'D':
296 ctype->class_collection[cnt] |= 1 << cls2;
297 break;
298
299 default:
300 error (5, 0, _("internal error in %s, line %u"),
301 __FUNCTION__, __LINE__);
302 }
303 }
304 }
305
306 /* ... and now test <SP> as a special case. */
307 space_value = charset_find_value (charset, "SP", 2);
308 if (space_value == ILLEGAL_CHAR_VALUE)
309 error (0, 0, _("character <SP> not defined in character map"));
310 else if ((cnt = BITPOS (tok_space),
311 (ELEM (ctype, class_collection, , space_value)
312 & BIT (tok_space)) == 0)
313 || (cnt = BITPOS (tok_blank),
314 (ELEM (ctype, class_collection, , space_value)
315 & BIT (tok_blank)) == 0))
316 error (0, 0, _("<SP> character not in class `%s'"),
317 valid_table[cnt].name);
318 else if ((cnt = BITPOS (tok_punct),
319 (ELEM (ctype, class_collection, , space_value)
320 & BIT (tok_punct)) != 0)
321 || (cnt = BITPOS (tok_graph),
322 (ELEM (ctype, class_collection, , space_value)
323 & BIT (tok_graph))
324 != 0))
325 error (0, 0, _("<SP> character must not be in class `%s'"),
326 valid_table[cnt].name);
327 else
328 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
75cd5204
RM
329
330 /* Now that the tests are done make sure the name array contains all
331 characters which are handled in the WIDTH section of the
332 character set definition file. */
333 if (charset->width_rules != NULL)
334 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
335 {
336 size_t inner;
337 for (inner = charset->width_rules[cnt].from;
338 inner <= charset->width_rules[cnt].to; ++inner)
339 (void) find_idx (ctype, NULL, NULL, NULL, inner);
340 }
19bc17a9
RM
341}
342
343
344void
75cd5204
RM
345ctype_output (struct localedef_t *locale, struct charset_t *charset,
346 const char *output_path)
19bc17a9
RM
347{
348 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
349 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
350 + 2 * (ctype->map_collection_nr - 2));
75cd5204
RM
351 struct iovec iov[2 + nelems + ctype->nr_charclass
352 + ctype->map_collection_nr];
19bc17a9 353 struct locale_file data;
7a12c6bb 354 u_int32_t idx[nelems];
75cd5204 355 size_t elem, cnt, offset, total;
19bc17a9
RM
356
357
358 if ((locale->binary & (1 << LC_CTYPE)) != 0)
359 {
360 iov[0].iov_base = ctype;
361 iov[0].iov_len = locale->len[LC_CTYPE];
362
363 write_locale_data (output_path, "LC_CTYPE", 1, iov);
364
365 return;
366 }
367
368
369 /* Now prepare the output: Find the sizes of the table we can use. */
75cd5204 370 allocate_arrays (ctype, charset);
19bc17a9
RM
371
372 data.magic = LIMAGIC (LC_CTYPE);
373 data.n = nelems;
374 iov[0].iov_base = (void *) &data;
375 iov[0].iov_len = sizeof (data);
376
377 iov[1].iov_base = (void *) idx;
378 iov[1].iov_len = sizeof (idx);
379
380 idx[0] = iov[0].iov_len + iov[1].iov_len;
381 offset = 0;
382
383 for (elem = 0; elem < nelems; ++elem)
384 {
385 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
386 switch (elem)
387 {
388#define CTYPE_DATA(name, base, len) \
389 case _NL_ITEM_INDEX (name): \
75cd5204
RM
390 iov[2 + elem + offset].iov_base = base; \
391 iov[2 + elem + offset].iov_len = len; \
392 if (elem + 1 < nelems) \
393 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
394 break
395
396 CTYPE_DATA (_NL_CTYPE_CLASS,
397 ctype->ctype_b,
398 (256 + 128) * sizeof (char_class_t));
399
400 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
401 ctype->map_eb[0],
402 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 403 * sizeof (u_int32_t));
19bc17a9
RM
404 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
405 ctype->map_eb[1],
406 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 407 * sizeof (u_int32_t));
19bc17a9
RM
408
409 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
410 ctype->map_el[0],
411 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 412 * sizeof (u_int32_t));
19bc17a9
RM
413 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
414 ctype->map_el[1],
415 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 416 * sizeof (u_int32_t));
19bc17a9
RM
417
418 CTYPE_DATA (_NL_CTYPE_CLASS32,
419 ctype->ctype32_b,
420 (ctype->plane_size * ctype->plane_cnt
421 * sizeof (char_class32_t)));
422
423 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
7a12c6bb
RM
424 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
425 * sizeof (u_int32_t)));
19bc17a9 426 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
7a12c6bb
RM
427 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
428 * sizeof (u_int32_t)));
19bc17a9
RM
429
430 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
7a12c6bb 431 &ctype->plane_size, sizeof (u_int32_t));
19bc17a9 432 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
7a12c6bb 433 &ctype->plane_cnt, sizeof (u_int32_t));
19bc17a9 434
75cd5204
RM
435 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
436 /* The class name array. */
437 total = 0;
438 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
439 {
440 iov[2 + elem + offset].iov_base
441 = (void *) ctype->classnames[cnt];
442 iov[2 + elem + offset].iov_len
443 = strlen (ctype->classnames[cnt]) + 1;
444 total += iov[2 + elem + offset].iov_len;
445 }
446 iov[2 + elem + offset].iov_base = (void *) "";
447 iov[2 + elem + offset].iov_len = 1;
448 ++total;
449
450 if (elem + 1 < nelems)
451 idx[elem + 1] = idx[elem] + total;
452 break;
453
454 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
455 /* The class name array. */
456 total = 0;
457 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
458 {
459 iov[2 + elem + offset].iov_base
460 = (void *) ctype->mapnames[cnt];
461 iov[2 + elem + offset].iov_len
462 = strlen (ctype->mapnames[cnt]) + 1;
463 total += iov[2 + elem + offset].iov_len;
464 }
465 iov[2 + elem + offset].iov_base = (void *) "";
466 iov[2 + elem + offset].iov_len = 1;
467 ++total;
468
469 if (elem + 1 < nelems)
470 idx[elem + 1] = idx[elem] + total;
471 break;
19bc17a9
RM
472
473 CTYPE_DATA (_NL_CTYPE_WIDTH,
75cd5204 474 ctype->width, ctype->plane_size * ctype->plane_cnt);
19bc17a9 475
0200214b
RM
476 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
477 &ctype->mb_cur_max, sizeof (u_int32_t));
478
6990326c
RM
479 CTYPE_DATA (_NL_CTYPE_CODESET_NAME,
480 ctype->codeset_name, strlen (ctype->codeset_name) + 1);
481
19bc17a9
RM
482 default:
483 assert (! "unknown CTYPE element");
484 }
485 else
486 {
487 /* Handle extra maps. */
488 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
489
490 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
75cd5204 491 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
19bc17a9 492 else
75cd5204 493 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
19bc17a9 494
75cd5204
RM
495 iov[2 + elem + offset].iov_len = ((ctype->plane_size
496 * ctype->plane_cnt + 128)
7a12c6bb 497 * sizeof (u_int32_t));
19bc17a9 498
75cd5204
RM
499 if (elem + 1 < nelems)
500 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
501 }
19bc17a9 502 }
19bc17a9 503
75cd5204
RM
504 assert (2 + elem + offset == (nelems + ctype->nr_charclass
505 + ctype->map_collection_nr + 2));
19bc17a9 506
75cd5204 507 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
508}
509
510
511/* Character class handling. */
512void
513ctype_class_new (struct linereader *lr, struct localedef_t *locale,
514 enum token_t tok, struct token *code,
515 struct charset_t *charset)
516{
517 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
518 code->val.str.start);
519}
520
521
522int
523ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
524 const char *name)
525{
526 int cnt;
527
528 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
529 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
530 == 0)
531 return 1;
532
533 return 0;
534}
535
536
537void
538ctype_class_start (struct linereader *lr, struct localedef_t *locale,
539 enum token_t tok, const char *str,
540 struct charset_t *charset)
541{
542 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
543 int cnt;
544
545 switch (tok)
546 {
547 case tok_upper:
548 str = "upper";
549 break;
550 case tok_lower:
551 str = "lower";
552 break;
553 case tok_alpha:
554 str = "alpha";
555 break;
556 case tok_digit:
557 str = "digit";
558 break;
559 case tok_xdigit:
560 str = "xdigit";
561 break;
562 case tok_space:
563 str = "space";
564 break;
565 case tok_print:
566 str = "print";
567 break;
568 case tok_graph:
569 str = "graph";
570 break;
571 case tok_blank:
572 str = "blank";
573 break;
574 case tok_cntrl:
575 str = "cntrl";
576 break;
577 case tok_punct:
578 str = "punct";
579 break;
580 case tok_alnum:
581 str = "alnum";
582 break;
583 case tok_ident:
584 break;
585 default:
586 assert (! "illegal token as class name: should not happen");
587 }
588
589 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
590 if (strcmp (str, ctype->classnames[cnt]) == 0)
591 break;
592
593 if (cnt >= ctype->nr_charclass)
594 assert (! "unknown class in class definition: should not happen");
595
596 ctype->class_done |= BIT (tok);
597
598 ctype->current_class_mask = 1 << cnt;
599 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
600}
601
602
603void
604ctype_class_from (struct linereader *lr, struct localedef_t *locale,
605 struct token *code, struct charset_t *charset)
606{
607 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
608 unsigned int value;
609
610 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
611
612 ctype->last_class_char = value;
613
614 if (value == ILLEGAL_CHAR_VALUE)
615 /* In the LC_CTYPE category it is no error when a character is
616 not found. This has to be ignored silently. */
617 return;
618
619 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
620 &ctype->class_collection_act, value)
621 |= ctype->current_class_mask;
622}
623
624
625void
626ctype_class_to (struct linereader *lr, struct localedef_t *locale,
627 struct token *code, struct charset_t *charset)
628{
629 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
630 unsigned int value, cnt;
631
632 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
633
634 assert (value >= ctype->last_class_char);
635
636 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
637 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
638 &ctype->class_collection_act, cnt)
639 |= ctype->current_class_mask;
640
641 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
642}
643
644
645void
646ctype_class_end (struct linereader *lr, struct localedef_t *locale)
647{
648 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
649
650 /* We have no special actions to perform here. */
651 ctype->current_class_mask = 0;
652 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
653}
654
655
656/* Character map handling. */
657void
658ctype_map_new (struct linereader *lr, struct localedef_t *locale,
659 enum token_t tok, struct token *code,
660 struct charset_t *charset)
661{
662 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
663 code->val.str.start, charset);
664}
665
666
667int
668ctype_is_charmap (struct linereader *lr, struct localedef_t *locale,
669 const char *name)
670{
671 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
672 size_t cnt;
673
674 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
675 if (strcmp (name, ctype->mapnames[cnt]) == 0)
676 return 1;
677
678 return 0;
679}
680
681
682void
683ctype_map_start (struct linereader *lr, struct localedef_t *locale,
684 enum token_t tok, const char *name, struct charset_t *charset)
685{
686 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
687 size_t cnt;
688
689 switch (tok)
690 {
691 case tok_toupper:
692 ctype->toupper_done = 1;
693 name = "toupper";
694 break;
695 case tok_tolower:
696 ctype->tolower_done = 1;
697 name = "tolower";
698 break;
699 case tok_ident:
700 break;
701 default:
702 assert (! "unknown token in category `LC_CTYPE' should not happen");
703 }
704
705 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
706 if (strcmp (name, ctype->mapnames[cnt]) == 0)
707 break;
708
709 if (cnt == ctype->map_collection_nr)
710 assert (! "unknown token in category `LC_CTYPE' should not happen");
711
712 ctype->last_map_idx = cnt;
713 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
714}
715
716
717void
718ctype_map_from (struct linereader *lr, struct localedef_t *locale,
719 struct token *code, struct charset_t *charset)
720{
721 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
722 unsigned int value;
723
724 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
725
726 if (value == ILLEGAL_CHAR_VALUE)
727 /* In the LC_CTYPE category it is no error when a character is
728 not found. This has to be ignored silently. */
729 return;
730
731 assert (ctype->last_map_idx < ctype->map_collection_nr);
732
733 ctype->from_map_char = value;
734}
735
736
737void
738ctype_map_to (struct linereader *lr, struct localedef_t *locale,
739 struct token *code, struct charset_t *charset)
740{
741 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
742 unsigned int value;
743
744 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
745
746 if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
747 || value == ILLEGAL_CHAR_VALUE)
748 {
749 /* In the LC_CTYPE category it is no error when a character is
750 not found. This has to be ignored silently. */
751 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
752 return;
753 }
754
755 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
756 &ctype->map_collection_max[ctype->last_map_idx],
757 &ctype->map_collection_act[ctype->last_map_idx],
758 ctype->from_map_char) = value;
759
760 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
761}
762
763
764void
765ctype_map_end (struct linereader *lr, struct localedef_t *locale)
766{
767 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
768
769 ctype->last_map_idx = MAX_NR_CHARMAP;
770 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
771}
772
773
774/* Local functions. */
775static void
776ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
777 const char *name)
778{
779 int cnt;
780
781 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
782 if (strcmp (ctype->classnames[cnt], name) == 0)
783 break;
784
785 if (cnt < ctype->nr_charclass)
786 {
787 lr_error (lr, _("character class `%s' already defined"));
788 return;
789 }
790
791 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
792 /* Exit code 2 is prescribed in P1003.2b. */
793 error (2, 0, _("\
794implementation limit: no more than %d character classes allowed"),
795 MAX_NR_CHARCLASS);
796
797 ctype->classnames[ctype->nr_charclass++] = name;
798}
799
800
801static void
802ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
803 const char *name, struct charset_t *charset)
804{
805 size_t max_chars = 0;
806 int cnt;
807
808 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
809 {
810 if (strcmp (ctype->mapnames[cnt], name) == 0)
811 break;
812
813 if (max_chars < ctype->map_collection_max[cnt])
814 max_chars = ctype->map_collection_max[cnt];
815 }
816
817 if (cnt < ctype->map_collection_nr)
818 {
819 lr_error (lr, _("character map `%s' already defined"));
820 return;
821 }
822
823 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
824 /* Exit code 2 is prescribed in P1003.2b. */
825 error (2, 0, _("\
826implementation limit: no more than %d character maps allowed"),
827 MAX_NR_CHARMAP);
828
829 ctype->mapnames[cnt] = name;
830
831 if (max_chars == 0)
a5b7bf0e 832 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
19bc17a9
RM
833 else
834 ctype->map_collection_max[cnt] = max_chars;
835
7a12c6bb
RM
836 ctype->map_collection[cnt] = (u_int32_t *)
837 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9 838 memset (ctype->map_collection[cnt], '\0',
7a12c6bb 839 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9
RM
840 ctype->map_collection_act[cnt] = 256;
841
842 ++ctype->map_collection_nr;
843}
844
845
75cd5204
RM
846/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
847 is possible if we only want ot extend the name array. */
7a12c6bb
RM
848static u_int32_t *
849find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
19bc17a9
RM
850 size_t *act, unsigned int idx)
851{
852 size_t cnt;
853
854 if (idx < 256)
75cd5204 855 return table == NULL ? NULL : &(*table)[idx];
19bc17a9
RM
856
857 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
858 if (ctype->charnames[cnt] == idx)
859 break;
860
861 /* We have to distinguish two cases: the names is found or not. */
862 if (cnt == ctype->charnames_act)
863 {
864 /* Extend the name array. */
865 if (ctype->charnames_act == ctype->charnames_max)
866 {
867 ctype->charnames_max *= 2;
868 ctype->charnames = (unsigned int *)
869 xrealloc (ctype->charnames,
870 sizeof (unsigned int) * ctype->charnames_max);
871 }
872 ctype->charnames[ctype->charnames_act++] = idx;
873 }
874
75cd5204
RM
875 if (table == NULL)
876 /* We have done everything we are asked to do. */
877 return NULL;
878
19bc17a9
RM
879 if (cnt >= *act)
880 {
881 if (cnt >= *max)
882 {
883 size_t old_max = *max;
884 do
885 *max *= 2;
886 while (*max <= cnt);
887
888 *table =
7a12c6bb
RM
889 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
890 memset (&(*table)[old_max], '\0',
891 (*max - old_max) * sizeof (u_int32_t));
19bc17a9
RM
892 }
893
894 (*table)[cnt] = 0;
895 *act = cnt;
896 }
897
898 return &(*table)[cnt];
899}
900
901
902static void
903set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
904{
905 /* These function defines the default values for the classes and conversions
906 according to POSIX.2 2.5.2.1.
907 It may seem that the order of these if-blocks is arbitrary but it is NOT.
908 Don't move them unless you know what you do! */
909
910 void set_default (int bit, int from, int to)
911 {
912 char tmp[2];
913 int ch;
914 /* Define string. */
915 strcpy (tmp, "?");
916
917 for (ch = from; ch <= to; ++ch)
918 {
919 unsigned int value;
920 tmp[0] = ch;
921
922 value = charset_find_value (charset, tmp, 1);
923 if (value == ILLEGAL_CHAR_VALUE)
924 {
925 error (0, 0, _("\
926character `%s' not defined while needed as default value"),
927 tmp);
928 continue;
929 }
930 else
931 ELEM (ctype, class_collection, , value) |= bit;
932 }
933 }
934
935 /* Set default values if keyword was not present. */
936 if ((ctype->class_done & BIT (tok_upper)) == 0)
937 /* "If this keyword [lower] is not specified, the lowercase letters
938 `A' through `Z', ..., shall automatically belong to this class,
939 with implementation defined character values." [P1003.2, 2.5.2.1] */
940 set_default (BIT (tok_upper), 'A', 'Z');
941
942 if ((ctype->class_done & BIT (tok_lower)) == 0)
943 /* "If this keyword [lower] is not specified, the lowercase letters
944 `a' through `z', ..., shall automatically belong to this class,
945 with implementation defined character values." [P1003.2, 2.5.2.1] */
946 set_default (BIT (tok_lower), 'a', 'z');
947
948 if ((ctype->class_done & BIT (tok_alpha)) == 0)
949 {
950 /* Table 2-6 in P1003.2 says that characters in class `upper' or
951 class `lower' *must* be in class `alpha'. */
952 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
953 size_t cnt;
954
955 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
956 if ((ctype->class_collection[cnt] & mask) != 0)
957 ctype->class_collection[cnt] |= BIT (tok_alpha);
958 }
959
960 if ((ctype->class_done & BIT (tok_digit)) == 0)
961 /* "If this keyword [digit] is not specified, the digits `0' through
962 `9', ..., shall automatically belong to this class, with
963 implementation-defined character values." [P1003.2, 2.5.2.1] */
964 set_default (BIT (tok_digit), '0', '9');
965
966 /* "Only characters specified for the `alpha' and `digit' keyword
967 shall be specified. Characters specified for the keyword `alpha'
968 and `digit' are automatically included in this class. */
969 {
970 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
971 size_t cnt;
972
973 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
974 if ((ctype->class_collection[cnt] & mask) != 0)
975 ctype->class_collection[cnt] |= BIT (tok_alnum);
976 }
977
978 if ((ctype->class_done & BIT (tok_space)) == 0)
979 /* "If this keyword [space] is not specified, the characters <space>,
980 <form-feed>, <newline>, <carriage-return>, <tab>, and
981 <vertical-tab>, ..., shall automatically belong to this class,
982 with implementation-defined character values." [P1003.2, 2.5.2.1] */
983 {
984 unsigned int value;
985
986 value = charset_find_value (charset, "space", 5);
987 if (value == ILLEGAL_CHAR_VALUE)
988 error (0, 0, _("\
989character `%s' not defined while needed as default value"),
990 "<space>");
991 else
992 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
993
994 value = charset_find_value (charset, "form-feed", 9);
995 if (value == ILLEGAL_CHAR_VALUE)
996 error (0, 0, _("\
997character `%s' not defined while needed as default value"),
998 "<form-feed>");
999 else
1000 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1001
1002 value = charset_find_value (charset, "newline", 7);
1003 if (value == ILLEGAL_CHAR_VALUE)
1004 error (0, 0, _("\
1005character `%s' not defined while needed as default value"),
1006 "<newline>");
1007 else
1008 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1009
1010 value = charset_find_value (charset, "carriage-return", 15);
1011 if (value == ILLEGAL_CHAR_VALUE)
1012 error (0, 0, _("\
1013character `%s' not defined while needed as default value"),
1014 "<carriage-return>");
1015 else
1016 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1017
1018 value = charset_find_value (charset, "tab", 3);
1019 if (value == ILLEGAL_CHAR_VALUE)
1020 error (0, 0, _("\
1021character `%s' not defined while needed as default value"),
1022 "<tab>");
1023 else
1024 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1025
1026 value = charset_find_value (charset, "vertical-tab", 12);
1027 if (value == ILLEGAL_CHAR_VALUE)
1028 error (0, 0, _("\
1029character `%s' not defined while needed as default value"),
1030 "<vertical-tab>");
1031 else
1032 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1033 }
1034
1035 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1036 /* "If this keyword is not specified, the digits `0' to `9', the
1037 uppercase letters `A' through `F', and the lowercase letters `a'
1038 through `f', ..., shell automatically belong to this class, with
1039 implementation defined character values." [P1003.2, 2.5.2.1] */
1040 {
1041 set_default (BIT (tok_xdigit), '0', '9');
1042 set_default (BIT (tok_xdigit), 'A', 'F');
1043 set_default (BIT (tok_xdigit), 'a', 'f');
1044 }
1045
1046 if ((ctype->class_done & BIT (tok_blank)) == 0)
1047 /* "If this keyword [blank] is unspecified, the characters <space> and
1048 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1049 {
1050 unsigned int value;
1051
1052 value = charset_find_value (charset, "space", 5);
1053 if (value == ILLEGAL_CHAR_VALUE)
1054 error (0, 0, _("\
1055character `%s' not defined while needed as default value"),
1056 "<space>");
1057 else
1058 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1059
1060 value = charset_find_value (charset, "tab", 3);
1061 if (value == ILLEGAL_CHAR_VALUE)
1062 error (0, 0, _("\
1063character `%s' not defined while needed as default value"),
1064 "<tab>");
1065 else
1066 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1067 }
1068
1069 if ((ctype->class_done & BIT (tok_graph)) == 0)
1070 /* "If this keyword [graph] is not specified, characters specified for
1071 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1072 shall belong to this character class." [P1003.2, 2.5.2.1] */
1073 {
1074 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1075 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1076 size_t cnt;
1077
1078 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1079 if ((ctype->class_collection[cnt] & mask) != 0)
1080 ctype->class_collection[cnt] |= BIT (tok_graph);
1081 }
1082
1083 if ((ctype->class_done & BIT (tok_print)) == 0)
1084 /* "If this keyword [print] is not provided, characters specified for
1085 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1086 and the <space> character shall belong to this character class."
1087 [P1003.2, 2.5.2.1] */
1088 {
1089 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1090 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1091 size_t cnt;
1092 int space;
1093
1094 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1095 if ((ctype->class_collection[cnt] & mask) != 0)
1096 ctype->class_collection[cnt] |= BIT (tok_print);
1097
1098 space = charset_find_value (charset, "space", 5);
1099 if (space == ILLEGAL_CHAR_VALUE)
1100 error (0, 0, _("\
1101character `%s' not defined while needed as default value"),
1102 "<space>");
1103 else
1104 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1105 }
1106
1107 if (ctype->toupper_done == 0)
1108 /* "If this keyword [toupper] is not spcified, the lowercase letters
1109 `a' through `z', and their corresponding uppercase letters `A' to
1110 `Z', ..., shall automatically be included, with implementation-
1111 defined character values." [P1003.2, 2.5.2.1] */
1112 {
1113 char tmp[4];
1114 int ch;
1115
1116 strcpy (tmp, "<?>");
1117
1118 for (ch = 'a'; ch <= 'z'; ++ch)
1119 {
1120 unsigned int value_from, value_to;
1121
1122 tmp[1] = (char) ch;
1123
1124 value_from = charset_find_value (charset, &tmp[1], 1);
1125 if (value_from == ILLEGAL_CHAR_VALUE)
1126 {
1127 error (0, 0, _("\
1128character `%c' not defined while needed as default value"),
1129 tmp);
1130 continue;
1131 }
1132
1133 /* This conversion is implementation defined. */
1134 tmp[1] = (char) (ch + ('A' - 'a'));
1135 value_to = charset_find_value (charset, &tmp[1], 1);
1136 if (value_to == -1)
1137 {
1138 error (0, 0, _("\
1139character `%s' not defined while needed as default value"),
1140 tmp);
1141 continue;
1142 }
1143
1144 /* The index [0] is determined by the order of the
1145 `ctype_map_newP' calls in `ctype_startup'. */
1146 ELEM (ctype, map_collection, [0], value_from) = value_to;
1147 }
1148 }
1149
1150 if (ctype->tolower_done == 0)
1151 /* "If this keyword [tolower] is not specified, the mapping shall be
1152 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1153 {
1154 size_t cnt;
1155
1156 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1157 if (ctype->map_collection[0][cnt] != 0)
1158 ELEM (ctype, map_collection, [1],
1159 ctype->map_collection[0][cnt])
1160 = ctype->charnames[cnt];
1161 }
1162}
1163
1164
1165static void
75cd5204 1166allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
19bc17a9
RM
1167{
1168 size_t idx;
1169
1170 /* First we have to decide how we organize the arrays. It is easy for
1171 a one-byte character set. But multi-byte character set cannot be
1172 stored flat because they might be sparsly used. So we determine an
1173 optimal hashing function for the used characters.
1174
1175 We use a very trivial hashing function to store the sparse table.
1176 CH % TABSIZE is used as an index. To solve multiple hits we have
1177 N planes. This gurantees a fixed search time for a character [N
1178 / 2]. In the following code we determine the minmum value for
1179 TABSIZE * N, where TABSIZE >= 256. */
1180 size_t min_total = UINT_MAX;
1181 size_t act_size = 256;
1182
1183 fputs (_("\
1184Computing table size for character classes might take a while..."),
1185 stderr);
1186
1187 while (act_size < min_total)
1188 {
1189 size_t cnt[act_size];
1190 size_t act_planes = 1;
1191
1192 memset (cnt, '\0', sizeof cnt);
1193
1194 for (idx = 0; idx < 256; ++idx)
1195 cnt[idx] = 1;
1196
1197 for (idx = 0; idx < ctype->charnames_act; ++idx)
1198 if (ctype->charnames[idx] >= 256)
1199 {
1200 size_t nr = ctype->charnames[idx] % act_size;
1201
1202 if (++cnt[nr] > act_planes)
1203 {
1204 act_planes = cnt[nr];
1205 if (act_size * act_planes >= min_total)
1206 break;
1207 }
1208 }
1209
1210 if (act_size * act_planes < min_total)
1211 {
1212 min_total = act_size * act_planes;
1213 ctype->plane_size = act_size;
1214 ctype->plane_cnt = act_planes;
1215 }
1216
1217 ++act_size;
1218 }
1219
1220 fprintf (stderr, _(" done\n"));
1221
75cd5204 1222
19bc17a9
RM
1223#if __BYTE_ORDER == __LITTLE_ENDIAN
1224# define NAMES_B1 ctype->names_el
1225# define NAMES_B2 ctype->names_eb
1226#else
1227# define NAMES_B1 ctype->names_eb
1228# define NAMES_B2 ctype->names_el
1229#endif
1230
7a12c6bb
RM
1231 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1232 * ctype->plane_cnt,
1233 sizeof (u_int32_t));
1234 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1235 * ctype->plane_cnt,
1236 sizeof (u_int32_t));
19bc17a9
RM
1237
1238 for (idx = 1; idx < 256; ++idx)
1239 NAMES_B1[idx] = idx;
1240
1241 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1242 NAMES_B1[0] = 1;
1243
1244 for (idx = 256; idx < ctype->charnames_act; ++idx)
1245 {
1246 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1247 size_t depth = 0;
1248
1249 while (NAMES_B1[nr + depth * ctype->plane_size])
1250 ++depth;
1251 assert (depth < ctype->plane_cnt);
1252
1253 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1254
1255 /* Now for faster access remember the index in the NAMES_B array. */
1256 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1257 }
1258 NAMES_B1[0] = 0;
1259
1260 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1261 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1262
1263
1264 /* You wonder about this amount of memory? This is only because some
1265 users do not manage to address the array with unsigned values or
1266 data types with range >= 256. '\200' would result in the array
1267 index -128. To help these poor people we duplicate the entries for
1268 128 up to 255 below the entry for \0. */
1269 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1270 sizeof (char_class_t));
1271 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1272 * ctype->plane_cnt,
1273 sizeof (char_class32_t));
1274
1275 /* Fill in the character class information. */
1276#if __BYTE_ORDER == __LITTLE_ENDIAN
1277# define TRANS(w) CHAR_CLASS_TRANS (w)
1278# define TRANS32(w) CHAR_CLASS32_TRANS (w)
1279#else
1280# define TRANS(w) (w)
1281# define TRANS32(w) (w)
1282#endif
1283
1284 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1285 if (ctype->charnames[idx] < 256)
1286 ctype->ctype_b[128 + ctype->charnames[idx]]
1287 = TRANS (ctype->class_collection[idx]);
1288
75cd5204
RM
1289 /* Mirror first 127 entries. We must take care that entry -1 is not
1290 mirrored because EOF == -1. */
1291 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
1292 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1293
1294 /* The 32 bit array contains all characters. */
1295 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1296 ctype->ctype32_b[ctype->charnames[idx]]
1297 = TRANS32 (ctype->class_collection[idx]);
1298
1299 /* Room for table of mappings. */
7a12c6bb
RM
1300 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1301 * sizeof (u_int32_t *));
1302 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1303 * sizeof (u_int32_t *));
19bc17a9
RM
1304
1305 /* Fill in all mappings. */
1306 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1307 {
1308 unsigned int idx2;
1309
1310 /* Allocate table. */
7a12c6bb
RM
1311 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1312 * ctype->plane_cnt + 128)
1313 * sizeof (u_int32_t));
1314 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1315 * ctype->plane_cnt + 128)
1316 * sizeof (u_int32_t));
19bc17a9
RM
1317
1318#if __BYTE_ORDER == __LITTLE_ENDIAN
1319# define MAP_B1 ctype->map_el
1320# define MAP_B2 ctype->map_eb
1321#else
1322# define MAP_B1 ctype->map_eb
1323# define MAP_B2 ctype->map_el
1324#endif
1325
1326 /* Copy default value (identity mapping). */
1327 memcpy (&MAP_B1[idx][128], NAMES_B1,
7a12c6bb 1328 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
19bc17a9
RM
1329
1330 /* Copy values from collection. */
1331 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1332 if (ctype->map_collection[idx][idx2] != 0)
1333 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1334 ctype->map_collection[idx][idx2];
1335
75cd5204
RM
1336 /* Mirror first 127 entries. We must take care not to map entry
1337 -1 because EOF == -1. */
1338 for (idx2 = 0; idx2 < 127; ++idx2)
19bc17a9
RM
1339 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1340
75cd5204
RM
1341 /* EOF must map to EOF. */
1342 MAP_B1[idx][127] = EOF;
19bc17a9
RM
1343
1344 /* And now the other byte order. */
1345 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1346 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1347 }
1348
1349 /* Extra array for class and map names. */
7a12c6bb
RM
1350 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1351 * sizeof (u_int32_t));
1352 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1353 * sizeof (u_int32_t));
75cd5204
RM
1354
1355 /* Array for width information. Because the expected width are very
1356 small we use only one single byte. This save space and we need
1357 not provide the information twice with both endianesses. */
1358 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1359 * ctype->plane_cnt);
1360 /* Initialize with default width value. */
1361 memset (ctype->width, charset->width_default,
1362 ctype->plane_size * ctype->plane_cnt);
1363 if (charset->width_rules != NULL)
1364 {
1365 size_t cnt;
1366
1367 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1368 if (charset->width_rules[cnt].width != charset->width_default)
1369 for (idx = charset->width_rules[cnt].from;
1370 idx <= charset->width_rules[cnt].to; ++idx)
1371 {
1372 size_t nr = idx % ctype->plane_size;
1373 size_t depth = 0;
1374
1375 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1376 ++depth;
1377 assert (depth < ctype->plane_cnt);
1378
1379 ctype->width[nr + depth * ctype->plane_size]
1380 = charset->width_rules[cnt].width;
1381 }
1382 }
0200214b
RM
1383
1384 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1385 character set definition gives the number of bytes in the wide
1386 character representation. We compute the number of bytes used
1387 for the UTF-8 encoded form. */
1388 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
6990326c
RM
1389
1390 /* We need the name of the currently used 8-bit character set to
1391 make correct conversion between this 8-bit representation and the
1392 ISO 10646 character set used internally for wide characters. */
1393 ctype->codeset_name = charset->code_set_name;
19bc17a9 1394}