]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/ld-ctype.c
Recognize hppa-next as a valid CPU-COMPANY combination.
[thirdparty/glibc.git] / locale / programs / ld-ctype.c
CommitLineData
19bc17a9
RM
1/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2This file is part of the GNU C Library.
3Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
4
5The GNU C Library is free software; you can redistribute it and/or
6modify it under the terms of the GNU Library General Public License as
7published by the Free Software Foundation; either version 2 of the
8License, or (at your option) any later version.
9
10The GNU C Library is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13Library General Public License for more details.
14
15You should have received a copy of the GNU Library General Public
16License along with the GNU C Library; see the file COPYING.LIB. If
17not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18Boston, MA 02111-1307, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
a68b0d31 24#include <alloca.h>
19bc17a9
RM
25#include <endian.h>
26#include <limits.h>
27#include <string.h>
28
29#include "locales.h"
30#include "localeinfo.h"
31#include "langinfo.h"
32#include "locfile-token.h"
33#include "stringtrans.h"
34
35/* Uncomment the following line in the production version. */
36/* define NDEBUG 1 */
37#include <assert.h>
38
39
40void *xmalloc (size_t __n);
41void *xcalloc (size_t __n, size_t __s);
42void *xrealloc (void *__ptr, size_t __n);
43
44
45/* The bit used for representing a special class. */
46#define BITPOS(class) ((class) - tok_upper)
47#define BIT(class) (1 << BITPOS (class))
48
49#define ELEM(ctype, collection, idx, value) \
50 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
51 &ctype->collection##_act idx, value)
52
53#define SWAPU32(w) \
54 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
55
56#define SWAPU16(w) \
57 ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
58
59
60/* To be compatible with former implementations we for now restrict
61 the number of bits for character classes to 16. When compatibility
62 is not necessary anymore increase the number to 32. */
7a12c6bb 63#define char_class_t u_int16_t
19bc17a9 64#define CHAR_CLASS_TRANS SWAPU16
7a12c6bb 65#define char_class32_t u_int32_t
19bc17a9
RM
66#define CHAR_CLASS32_TRANS SWAPU32
67
68
69/* The real definition of the struct for the LC_CTYPE locale. */
70struct locale_ctype_t
71{
72 unsigned int *charnames;
73 size_t charnames_max;
74 size_t charnames_act;
75
7a12c6bb
RM
76 /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
77#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
19bc17a9
RM
78 int nr_charclass;
79 const char *classnames[MAX_NR_CHARCLASS];
80 unsigned long int current_class_mask;
81 unsigned int last_class_char;
7a12c6bb 82 u_int32_t *class_collection;
19bc17a9
RM
83 size_t class_collection_max;
84 size_t class_collection_act;
85 unsigned long int class_done;
86
87 /* If the following number ever turns out to be too small simply
88 increase it. But I doubt it will. --drepper@gnu */
89#define MAX_NR_CHARMAP 16
90 const char *mapnames[MAX_NR_CHARMAP];
7a12c6bb 91 u_int32_t *map_collection[MAX_NR_CHARMAP];
a5b7bf0e
RM
92 u_int32_t map_collection_max[MAX_NR_CHARMAP];
93 u_int32_t map_collection_act[MAX_NR_CHARMAP];
19bc17a9
RM
94 size_t map_collection_nr;
95 size_t last_map_idx;
96 unsigned int from_map_char;
97 int toupper_done;
98 int tolower_done;
99
100 /* The arrays for the binary representation. */
7a12c6bb
RM
101 u_int32_t plane_size;
102 u_int32_t plane_cnt;
19bc17a9
RM
103 char_class_t *ctype_b;
104 char_class32_t *ctype32_b;
7a12c6bb
RM
105 u_int32_t *names_el;
106 u_int32_t *names_eb;
107 u_int32_t **map_eb;
108 u_int32_t **map_el;
109 u_int32_t *class_name_ptr;
110 u_int32_t *map_name_ptr;
75cd5204 111 unsigned char *width;
0200214b 112 u_int32_t mb_cur_max;
6990326c 113 const char *codeset_name;
19bc17a9
RM
114};
115
116
117/* Prototypes for local functions. */
118static void ctype_class_newP (struct linereader *lr,
119 struct locale_ctype_t *ctype, const char *name);
120static void ctype_map_newP (struct linereader *lr,
121 struct locale_ctype_t *ctype,
122 const char *name, struct charset_t *charset);
7a12c6bb
RM
123static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
124 size_t *max, size_t *act, unsigned int idx);
19bc17a9
RM
125static void set_class_defaults (struct locale_ctype_t *ctype,
126 struct charset_t *charset);
75cd5204
RM
127static void allocate_arrays (struct locale_ctype_t *ctype,
128 struct charset_t *charset);
19bc17a9
RM
129
130
131void
132ctype_startup (struct linereader *lr, struct localedef_t *locale,
133 struct charset_t *charset)
134{
135 unsigned int cnt;
136 struct locale_ctype_t *ctype;
137
138 /* It is important that we always use UCS1 encoding for strings now. */
139 encoding_method = ENC_UCS1;
140
141 /* Allocate the needed room. */
142 locale->categories[LC_CTYPE].ctype = ctype =
143 (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
144
145 /* We have no names seen yet. */
146 ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
147 ctype->charnames =
148 (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
149 for (cnt = 0; cnt < 256; ++cnt)
150 ctype->charnames[cnt] = cnt;
151 ctype->charnames_act = 256;
152
153 /* Fill character class information. */
154 ctype->nr_charclass = 0;
155 ctype->current_class_mask = 0;
156 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
157 /* The order of the following instructions determines the bit
158 positions! */
159 ctype_class_newP (lr, ctype, "upper");
160 ctype_class_newP (lr, ctype, "lower");
161 ctype_class_newP (lr, ctype, "alpha");
162 ctype_class_newP (lr, ctype, "digit");
163 ctype_class_newP (lr, ctype, "xdigit");
164 ctype_class_newP (lr, ctype, "space");
165 ctype_class_newP (lr, ctype, "print");
166 ctype_class_newP (lr, ctype, "graph");
167 ctype_class_newP (lr, ctype, "blank");
168 ctype_class_newP (lr, ctype, "cntrl");
169 ctype_class_newP (lr, ctype, "punct");
170 ctype_class_newP (lr, ctype, "alnum");
171
172 ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
7a12c6bb
RM
173 ctype->class_collection
174 = (u_int32_t *) xmalloc (sizeof (unsigned long int)
175 * ctype->class_collection_max);
19bc17a9
RM
176 memset (ctype->class_collection, '\0',
177 sizeof (unsigned long int) * ctype->class_collection_max);
178 ctype->class_collection_act = 256;
179
180 /* Fill character map information. */
181 ctype->map_collection_nr = 0;
182 ctype->last_map_idx = MAX_NR_CHARMAP;
183 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
184 ctype_map_newP (lr, ctype, "toupper", charset);
185 ctype_map_newP (lr, ctype, "tolower", charset);
186
187 /* Fill first 256 entries in `toupper' and `tolower' arrays. */
188 for (cnt = 0; cnt < 256; ++cnt)
189 {
190 ctype->map_collection[0][cnt] = cnt;
191 ctype->map_collection[1][cnt] = cnt;
192 }
193}
194
195
196void
197ctype_finish (struct localedef_t *locale, struct charset_t *charset)
198{
199 /* See POSIX.2, table 2-6 for the meaning of the following table. */
200#define NCLASS 12
201 static const struct
202 {
203 const char *name;
204 const char allow[NCLASS];
205 }
206 valid_table[NCLASS] =
207 {
208 /* The order is important. See token.h for more information.
209 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
210 { "upper", "--MX-XDDXXX-" },
211 { "lower", "--MX-XDDXXX-" },
212 { "alpha", "---X-XDDXXX-" },
213 { "digit", "XXX--XDDXXX-" },
214 { "xdigit", "-----XDDXXX-" },
215 { "space", "XXXXX------X" },
216 { "print", "---------X--" },
217 { "graph", "---------X--" },
218 { "blank", "XXXXXM-----X" },
219 { "cntrl", "XXXXX-XX--XX" },
220 { "punct", "XXXXX-DD-X-X" },
221 { "alnum", "-----XDDXXX-" }
222 };
223 size_t cnt;
224 int cls1, cls2;
225 unsigned int space_value;
226 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
227
228 /* Set default value for classes not specified. */
229 set_class_defaults (ctype, charset);
230
231 /* Check according to table. */
232 for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
233 {
234 unsigned long int tmp;
235
236 tmp = ctype->class_collection[cnt];
237 if (tmp == 0)
238 continue;
239
240 for (cls1 = 0; cls1 < NCLASS; ++cls1)
241 if ((tmp & (1 << cls1)) != 0)
242 for (cls2 = 0; cls2 < NCLASS; ++cls2)
243 if (valid_table[cls1].allow[cls2] != '-')
244 {
245 int eq = (tmp & (1 << cls2)) != 0;
246 switch (valid_table[cls1].allow[cls2])
247 {
248 case 'M':
249 if (!eq)
250 {
251 char buf[17];
252 char *cp = buf;
253 unsigned int value;
254
255 value = ctype->charnames[cnt];
256
257 if ((value & 0xff000000) != 0)
258 cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
259 if ((value & 0xffff0000) != 0)
260 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
261 if ((value & 0xffffff00) != 0)
262 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
263 sprintf (cp, "\\%o", value & 0xff);
264
265 error (0, 0, _("\
266character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
267 cp, valid_table[cls1].name,
268 valid_table[cls2].name);
269 }
270 break;
271
272 case 'X':
273 if (eq)
274 {
275 char buf[17];
276 char *cp = buf;
277 unsigned int value;
278
279 value = ctype->charnames[cnt];
280
281 if ((value & 0xff000000) != 0)
282 cp += sprintf (cp, "\\%o", value >> 24);
283 if ((value & 0xffff0000) != 0)
284 cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
285 if ((value & 0xffffff00) != 0)
286 cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
287 sprintf (cp, "\\%o", value & 0xff);
288
289 error (0, 0, _("\
290character %s'%s' in class `%s' must not be in class `%s'"),
291 value > 256 ? "L" : "", cp,
292 valid_table[cls1].name, valid_table[cls2].name);
293 }
294 break;
295
296 case 'D':
297 ctype->class_collection[cnt] |= 1 << cls2;
298 break;
299
300 default:
301 error (5, 0, _("internal error in %s, line %u"),
302 __FUNCTION__, __LINE__);
303 }
304 }
305 }
306
307 /* ... and now test <SP> as a special case. */
308 space_value = charset_find_value (charset, "SP", 2);
309 if (space_value == ILLEGAL_CHAR_VALUE)
310 error (0, 0, _("character <SP> not defined in character map"));
311 else if ((cnt = BITPOS (tok_space),
312 (ELEM (ctype, class_collection, , space_value)
313 & BIT (tok_space)) == 0)
314 || (cnt = BITPOS (tok_blank),
315 (ELEM (ctype, class_collection, , space_value)
316 & BIT (tok_blank)) == 0))
317 error (0, 0, _("<SP> character not in class `%s'"),
318 valid_table[cnt].name);
319 else if ((cnt = BITPOS (tok_punct),
320 (ELEM (ctype, class_collection, , space_value)
321 & BIT (tok_punct)) != 0)
322 || (cnt = BITPOS (tok_graph),
323 (ELEM (ctype, class_collection, , space_value)
324 & BIT (tok_graph))
325 != 0))
326 error (0, 0, _("<SP> character must not be in class `%s'"),
327 valid_table[cnt].name);
328 else
329 ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
75cd5204
RM
330
331 /* Now that the tests are done make sure the name array contains all
332 characters which are handled in the WIDTH section of the
333 character set definition file. */
334 if (charset->width_rules != NULL)
335 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
336 {
337 size_t inner;
338 for (inner = charset->width_rules[cnt].from;
339 inner <= charset->width_rules[cnt].to; ++inner)
340 (void) find_idx (ctype, NULL, NULL, NULL, inner);
341 }
19bc17a9
RM
342}
343
344
345void
75cd5204
RM
346ctype_output (struct localedef_t *locale, struct charset_t *charset,
347 const char *output_path)
19bc17a9
RM
348{
349 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
350 const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
351 + 2 * (ctype->map_collection_nr - 2));
75cd5204
RM
352 struct iovec iov[2 + nelems + ctype->nr_charclass
353 + ctype->map_collection_nr];
19bc17a9 354 struct locale_file data;
7a12c6bb 355 u_int32_t idx[nelems];
75cd5204 356 size_t elem, cnt, offset, total;
19bc17a9
RM
357
358
359 if ((locale->binary & (1 << LC_CTYPE)) != 0)
360 {
361 iov[0].iov_base = ctype;
362 iov[0].iov_len = locale->len[LC_CTYPE];
363
364 write_locale_data (output_path, "LC_CTYPE", 1, iov);
365
366 return;
367 }
368
369
370 /* Now prepare the output: Find the sizes of the table we can use. */
75cd5204 371 allocate_arrays (ctype, charset);
19bc17a9
RM
372
373 data.magic = LIMAGIC (LC_CTYPE);
374 data.n = nelems;
375 iov[0].iov_base = (void *) &data;
376 iov[0].iov_len = sizeof (data);
377
378 iov[1].iov_base = (void *) idx;
379 iov[1].iov_len = sizeof (idx);
380
381 idx[0] = iov[0].iov_len + iov[1].iov_len;
382 offset = 0;
383
384 for (elem = 0; elem < nelems; ++elem)
385 {
386 if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
387 switch (elem)
388 {
389#define CTYPE_DATA(name, base, len) \
390 case _NL_ITEM_INDEX (name): \
ce7a5ef4
RM
391 iov[2 + elem + offset].iov_base = (base); \
392 iov[2 + elem + offset].iov_len = (len); \
75cd5204
RM
393 if (elem + 1 < nelems) \
394 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
19bc17a9
RM
395 break
396
397 CTYPE_DATA (_NL_CTYPE_CLASS,
398 ctype->ctype_b,
399 (256 + 128) * sizeof (char_class_t));
400
401 CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
402 ctype->map_eb[0],
403 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 404 * sizeof (u_int32_t));
19bc17a9
RM
405 CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
406 ctype->map_eb[1],
407 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 408 * sizeof (u_int32_t));
19bc17a9
RM
409
410 CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
411 ctype->map_el[0],
412 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 413 * sizeof (u_int32_t));
19bc17a9
RM
414 CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
415 ctype->map_el[1],
416 (ctype->plane_size * ctype->plane_cnt + 128)
7a12c6bb 417 * sizeof (u_int32_t));
19bc17a9
RM
418
419 CTYPE_DATA (_NL_CTYPE_CLASS32,
420 ctype->ctype32_b,
421 (ctype->plane_size * ctype->plane_cnt
422 * sizeof (char_class32_t)));
423
424 CTYPE_DATA (_NL_CTYPE_NAMES_EB,
7a12c6bb
RM
425 ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
426 * sizeof (u_int32_t)));
19bc17a9 427 CTYPE_DATA (_NL_CTYPE_NAMES_EL,
7a12c6bb
RM
428 ctype->names_el, (ctype->plane_size * ctype->plane_cnt
429 * sizeof (u_int32_t)));
19bc17a9
RM
430
431 CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
7a12c6bb 432 &ctype->plane_size, sizeof (u_int32_t));
19bc17a9 433 CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
7a12c6bb 434 &ctype->plane_cnt, sizeof (u_int32_t));
19bc17a9 435
75cd5204
RM
436 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
437 /* The class name array. */
438 total = 0;
439 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
440 {
441 iov[2 + elem + offset].iov_base
442 = (void *) ctype->classnames[cnt];
443 iov[2 + elem + offset].iov_len
444 = strlen (ctype->classnames[cnt]) + 1;
445 total += iov[2 + elem + offset].iov_len;
446 }
ce7a5ef4
RM
447 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
448 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
449 total += 1 + (4 - ((total + 1) % 4));
75cd5204
RM
450
451 if (elem + 1 < nelems)
452 idx[elem + 1] = idx[elem] + total;
453 break;
454
455 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
456 /* The class name array. */
457 total = 0;
458 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
459 {
460 iov[2 + elem + offset].iov_base
461 = (void *) ctype->mapnames[cnt];
462 iov[2 + elem + offset].iov_len
463 = strlen (ctype->mapnames[cnt]) + 1;
464 total += iov[2 + elem + offset].iov_len;
465 }
ce7a5ef4
RM
466 iov[2 + elem + offset].iov_base = (void *) "\0\0\0";
467 iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
468 total += 1 + (4 - ((total + 1) % 4));
75cd5204
RM
469
470 if (elem + 1 < nelems)
471 idx[elem + 1] = idx[elem] + total;
472 break;
19bc17a9
RM
473
474 CTYPE_DATA (_NL_CTYPE_WIDTH,
75cd5204 475 ctype->width, ctype->plane_size * ctype->plane_cnt);
19bc17a9 476
0200214b
RM
477 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
478 &ctype->mb_cur_max, sizeof (u_int32_t));
479
ce7a5ef4
RM
480 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
481 total = strlen (ctype->codeset_name) + 1;
482 if (total % 4 == 0)
483 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
484 else
485 {
486 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
487 memcpy (iov[2 + elem + offset].iov_base, ctype->codeset_name,
488 total);
489 total = (total + 3) & ~3;
490 }
491 iov[2 + elem + offset].iov_len = total;
492 if (elem + 1 < nelems)
493 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
494 break;
6990326c 495
19bc17a9
RM
496 default:
497 assert (! "unknown CTYPE element");
498 }
499 else
500 {
501 /* Handle extra maps. */
502 size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
503
504 if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
75cd5204 505 iov[2 + elem + offset].iov_base = ctype->map_eb[nr];
19bc17a9 506 else
75cd5204 507 iov[2 + elem + offset].iov_base = ctype->map_el[nr];
19bc17a9 508
75cd5204
RM
509 iov[2 + elem + offset].iov_len = ((ctype->plane_size
510 * ctype->plane_cnt + 128)
7a12c6bb 511 * sizeof (u_int32_t));
19bc17a9 512
75cd5204
RM
513 if (elem + 1 < nelems)
514 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
515 }
19bc17a9 516 }
19bc17a9 517
75cd5204
RM
518 assert (2 + elem + offset == (nelems + ctype->nr_charclass
519 + ctype->map_collection_nr + 2));
19bc17a9 520
75cd5204 521 write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
19bc17a9
RM
522}
523
524
525/* Character class handling. */
526void
527ctype_class_new (struct linereader *lr, struct localedef_t *locale,
528 enum token_t tok, struct token *code,
529 struct charset_t *charset)
530{
531 ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
532 code->val.str.start);
533}
534
535
536int
537ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
538 const char *name)
539{
540 int cnt;
541
542 for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
543 if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
544 == 0)
545 return 1;
546
547 return 0;
548}
549
550
551void
552ctype_class_start (struct linereader *lr, struct localedef_t *locale,
553 enum token_t tok, const char *str,
554 struct charset_t *charset)
555{
556 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
557 int cnt;
558
559 switch (tok)
560 {
561 case tok_upper:
562 str = "upper";
563 break;
564 case tok_lower:
565 str = "lower";
566 break;
567 case tok_alpha:
568 str = "alpha";
569 break;
570 case tok_digit:
571 str = "digit";
572 break;
573 case tok_xdigit:
574 str = "xdigit";
575 break;
576 case tok_space:
577 str = "space";
578 break;
579 case tok_print:
580 str = "print";
581 break;
582 case tok_graph:
583 str = "graph";
584 break;
585 case tok_blank:
586 str = "blank";
587 break;
588 case tok_cntrl:
589 str = "cntrl";
590 break;
591 case tok_punct:
592 str = "punct";
593 break;
594 case tok_alnum:
595 str = "alnum";
596 break;
597 case tok_ident:
598 break;
599 default:
600 assert (! "illegal token as class name: should not happen");
601 }
602
603 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
604 if (strcmp (str, ctype->classnames[cnt]) == 0)
605 break;
606
607 if (cnt >= ctype->nr_charclass)
608 assert (! "unknown class in class definition: should not happen");
609
610 ctype->class_done |= BIT (tok);
611
612 ctype->current_class_mask = 1 << cnt;
613 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
614}
615
616
617void
618ctype_class_from (struct linereader *lr, struct localedef_t *locale,
619 struct token *code, struct charset_t *charset)
620{
621 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
622 unsigned int value;
623
624 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
625
626 ctype->last_class_char = value;
627
628 if (value == ILLEGAL_CHAR_VALUE)
629 /* In the LC_CTYPE category it is no error when a character is
630 not found. This has to be ignored silently. */
631 return;
632
633 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
634 &ctype->class_collection_act, value)
635 |= ctype->current_class_mask;
636}
637
638
639void
640ctype_class_to (struct linereader *lr, struct localedef_t *locale,
641 struct token *code, struct charset_t *charset)
642{
643 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
644 unsigned int value, cnt;
645
646 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
647
648 assert (value >= ctype->last_class_char);
649
650 for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
651 *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
652 &ctype->class_collection_act, cnt)
653 |= ctype->current_class_mask;
654
655 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
656}
657
658
659void
660ctype_class_end (struct linereader *lr, struct localedef_t *locale)
661{
662 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
663
664 /* We have no special actions to perform here. */
665 ctype->current_class_mask = 0;
666 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
667}
668
669
670/* Character map handling. */
671void
672ctype_map_new (struct linereader *lr, struct localedef_t *locale,
673 enum token_t tok, struct token *code,
674 struct charset_t *charset)
675{
676 ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
677 code->val.str.start, charset);
678}
679
680
681int
503054c0
RM
682ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
683 const char *name)
19bc17a9
RM
684{
685 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
686 size_t cnt;
687
688 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
689 if (strcmp (name, ctype->mapnames[cnt]) == 0)
690 return 1;
691
692 return 0;
693}
694
695
696void
697ctype_map_start (struct linereader *lr, struct localedef_t *locale,
698 enum token_t tok, const char *name, struct charset_t *charset)
699{
700 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
701 size_t cnt;
702
703 switch (tok)
704 {
705 case tok_toupper:
706 ctype->toupper_done = 1;
707 name = "toupper";
708 break;
709 case tok_tolower:
710 ctype->tolower_done = 1;
711 name = "tolower";
712 break;
713 case tok_ident:
714 break;
715 default:
716 assert (! "unknown token in category `LC_CTYPE' should not happen");
717 }
718
719 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
720 if (strcmp (name, ctype->mapnames[cnt]) == 0)
721 break;
722
723 if (cnt == ctype->map_collection_nr)
724 assert (! "unknown token in category `LC_CTYPE' should not happen");
725
726 ctype->last_map_idx = cnt;
727 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
728}
729
730
731void
732ctype_map_from (struct linereader *lr, struct localedef_t *locale,
733 struct token *code, struct charset_t *charset)
734{
735 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
736 unsigned int value;
737
738 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
739
740 if (value == ILLEGAL_CHAR_VALUE)
741 /* In the LC_CTYPE category it is no error when a character is
742 not found. This has to be ignored silently. */
743 return;
744
745 assert (ctype->last_map_idx < ctype->map_collection_nr);
746
747 ctype->from_map_char = value;
748}
749
750
751void
752ctype_map_to (struct linereader *lr, struct localedef_t *locale,
753 struct token *code, struct charset_t *charset)
754{
755 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
756 unsigned int value;
757
758 value = charset_find_value (charset, code->val.str.start, code->val.str.len);
759
760 if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
761 || value == ILLEGAL_CHAR_VALUE)
762 {
763 /* In the LC_CTYPE category it is no error when a character is
764 not found. This has to be ignored silently. */
765 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
766 return;
767 }
768
769 *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
770 &ctype->map_collection_max[ctype->last_map_idx],
771 &ctype->map_collection_act[ctype->last_map_idx],
772 ctype->from_map_char) = value;
773
774 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
775}
776
777
778void
779ctype_map_end (struct linereader *lr, struct localedef_t *locale)
780{
781 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
782
783 ctype->last_map_idx = MAX_NR_CHARMAP;
784 ctype->from_map_char = ILLEGAL_CHAR_VALUE;
785}
786
787
788/* Local functions. */
789static void
790ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
791 const char *name)
792{
793 int cnt;
794
795 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
796 if (strcmp (ctype->classnames[cnt], name) == 0)
797 break;
798
799 if (cnt < ctype->nr_charclass)
800 {
801 lr_error (lr, _("character class `%s' already defined"));
802 return;
803 }
804
805 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
806 /* Exit code 2 is prescribed in P1003.2b. */
807 error (2, 0, _("\
808implementation limit: no more than %d character classes allowed"),
809 MAX_NR_CHARCLASS);
810
811 ctype->classnames[ctype->nr_charclass++] = name;
812}
813
814
815static void
816ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
817 const char *name, struct charset_t *charset)
818{
819 size_t max_chars = 0;
820 int cnt;
821
822 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
823 {
824 if (strcmp (ctype->mapnames[cnt], name) == 0)
825 break;
826
827 if (max_chars < ctype->map_collection_max[cnt])
828 max_chars = ctype->map_collection_max[cnt];
829 }
830
831 if (cnt < ctype->map_collection_nr)
832 {
833 lr_error (lr, _("character map `%s' already defined"));
834 return;
835 }
836
837 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
838 /* Exit code 2 is prescribed in P1003.2b. */
839 error (2, 0, _("\
840implementation limit: no more than %d character maps allowed"),
841 MAX_NR_CHARMAP);
842
843 ctype->mapnames[cnt] = name;
844
845 if (max_chars == 0)
a5b7bf0e 846 ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
19bc17a9
RM
847 else
848 ctype->map_collection_max[cnt] = max_chars;
849
7a12c6bb
RM
850 ctype->map_collection[cnt] = (u_int32_t *)
851 xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9 852 memset (ctype->map_collection[cnt], '\0',
7a12c6bb 853 sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
19bc17a9
RM
854 ctype->map_collection_act[cnt] = 256;
855
856 ++ctype->map_collection_nr;
857}
858
859
75cd5204
RM
860/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
861 is possible if we only want ot extend the name array. */
7a12c6bb
RM
862static u_int32_t *
863find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
19bc17a9
RM
864 size_t *act, unsigned int idx)
865{
866 size_t cnt;
867
868 if (idx < 256)
75cd5204 869 return table == NULL ? NULL : &(*table)[idx];
19bc17a9
RM
870
871 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
872 if (ctype->charnames[cnt] == idx)
873 break;
874
875 /* We have to distinguish two cases: the names is found or not. */
876 if (cnt == ctype->charnames_act)
877 {
878 /* Extend the name array. */
879 if (ctype->charnames_act == ctype->charnames_max)
880 {
881 ctype->charnames_max *= 2;
882 ctype->charnames = (unsigned int *)
883 xrealloc (ctype->charnames,
884 sizeof (unsigned int) * ctype->charnames_max);
885 }
886 ctype->charnames[ctype->charnames_act++] = idx;
887 }
888
75cd5204
RM
889 if (table == NULL)
890 /* We have done everything we are asked to do. */
891 return NULL;
892
19bc17a9
RM
893 if (cnt >= *act)
894 {
895 if (cnt >= *max)
896 {
897 size_t old_max = *max;
898 do
899 *max *= 2;
900 while (*max <= cnt);
901
902 *table =
7a12c6bb
RM
903 (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
904 memset (&(*table)[old_max], '\0',
905 (*max - old_max) * sizeof (u_int32_t));
19bc17a9
RM
906 }
907
908 (*table)[cnt] = 0;
909 *act = cnt;
910 }
911
912 return &(*table)[cnt];
913}
914
915
916static void
917set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
918{
919 /* These function defines the default values for the classes and conversions
920 according to POSIX.2 2.5.2.1.
921 It may seem that the order of these if-blocks is arbitrary but it is NOT.
922 Don't move them unless you know what you do! */
923
924 void set_default (int bit, int from, int to)
925 {
926 char tmp[2];
927 int ch;
928 /* Define string. */
929 strcpy (tmp, "?");
930
931 for (ch = from; ch <= to; ++ch)
932 {
933 unsigned int value;
934 tmp[0] = ch;
935
936 value = charset_find_value (charset, tmp, 1);
937 if (value == ILLEGAL_CHAR_VALUE)
938 {
939 error (0, 0, _("\
940character `%s' not defined while needed as default value"),
941 tmp);
942 continue;
943 }
944 else
945 ELEM (ctype, class_collection, , value) |= bit;
946 }
947 }
948
949 /* Set default values if keyword was not present. */
950 if ((ctype->class_done & BIT (tok_upper)) == 0)
951 /* "If this keyword [lower] is not specified, the lowercase letters
952 `A' through `Z', ..., shall automatically belong to this class,
953 with implementation defined character values." [P1003.2, 2.5.2.1] */
954 set_default (BIT (tok_upper), 'A', 'Z');
955
956 if ((ctype->class_done & BIT (tok_lower)) == 0)
957 /* "If this keyword [lower] is not specified, the lowercase letters
958 `a' through `z', ..., shall automatically belong to this class,
959 with implementation defined character values." [P1003.2, 2.5.2.1] */
960 set_default (BIT (tok_lower), 'a', 'z');
961
962 if ((ctype->class_done & BIT (tok_alpha)) == 0)
963 {
964 /* Table 2-6 in P1003.2 says that characters in class `upper' or
965 class `lower' *must* be in class `alpha'. */
966 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
967 size_t cnt;
968
969 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
970 if ((ctype->class_collection[cnt] & mask) != 0)
971 ctype->class_collection[cnt] |= BIT (tok_alpha);
972 }
973
974 if ((ctype->class_done & BIT (tok_digit)) == 0)
975 /* "If this keyword [digit] is not specified, the digits `0' through
976 `9', ..., shall automatically belong to this class, with
977 implementation-defined character values." [P1003.2, 2.5.2.1] */
978 set_default (BIT (tok_digit), '0', '9');
979
980 /* "Only characters specified for the `alpha' and `digit' keyword
981 shall be specified. Characters specified for the keyword `alpha'
982 and `digit' are automatically included in this class. */
983 {
984 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
985 size_t cnt;
986
987 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
988 if ((ctype->class_collection[cnt] & mask) != 0)
989 ctype->class_collection[cnt] |= BIT (tok_alnum);
990 }
991
992 if ((ctype->class_done & BIT (tok_space)) == 0)
993 /* "If this keyword [space] is not specified, the characters <space>,
994 <form-feed>, <newline>, <carriage-return>, <tab>, and
995 <vertical-tab>, ..., shall automatically belong to this class,
996 with implementation-defined character values." [P1003.2, 2.5.2.1] */
997 {
998 unsigned int value;
999
1000 value = charset_find_value (charset, "space", 5);
1001 if (value == ILLEGAL_CHAR_VALUE)
1002 error (0, 0, _("\
1003character `%s' not defined while needed as default value"),
1004 "<space>");
1005 else
1006 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1007
1008 value = charset_find_value (charset, "form-feed", 9);
1009 if (value == ILLEGAL_CHAR_VALUE)
1010 error (0, 0, _("\
1011character `%s' not defined while needed as default value"),
1012 "<form-feed>");
1013 else
1014 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1015
1016 value = charset_find_value (charset, "newline", 7);
1017 if (value == ILLEGAL_CHAR_VALUE)
1018 error (0, 0, _("\
1019character `%s' not defined while needed as default value"),
1020 "<newline>");
1021 else
1022 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1023
1024 value = charset_find_value (charset, "carriage-return", 15);
1025 if (value == ILLEGAL_CHAR_VALUE)
1026 error (0, 0, _("\
1027character `%s' not defined while needed as default value"),
1028 "<carriage-return>");
1029 else
1030 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1031
1032 value = charset_find_value (charset, "tab", 3);
1033 if (value == ILLEGAL_CHAR_VALUE)
1034 error (0, 0, _("\
1035character `%s' not defined while needed as default value"),
1036 "<tab>");
1037 else
1038 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1039
1040 value = charset_find_value (charset, "vertical-tab", 12);
1041 if (value == ILLEGAL_CHAR_VALUE)
1042 error (0, 0, _("\
1043character `%s' not defined while needed as default value"),
1044 "<vertical-tab>");
1045 else
1046 ELEM (ctype, class_collection, , value) |= BIT (tok_space);
1047 }
1048
1049 if ((ctype->class_done & BIT (tok_xdigit)) == 0)
1050 /* "If this keyword is not specified, the digits `0' to `9', the
1051 uppercase letters `A' through `F', and the lowercase letters `a'
1052 through `f', ..., shell automatically belong to this class, with
1053 implementation defined character values." [P1003.2, 2.5.2.1] */
1054 {
1055 set_default (BIT (tok_xdigit), '0', '9');
1056 set_default (BIT (tok_xdigit), 'A', 'F');
1057 set_default (BIT (tok_xdigit), 'a', 'f');
1058 }
1059
1060 if ((ctype->class_done & BIT (tok_blank)) == 0)
1061 /* "If this keyword [blank] is unspecified, the characters <space> and
1062 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
1063 {
1064 unsigned int value;
1065
1066 value = charset_find_value (charset, "space", 5);
1067 if (value == ILLEGAL_CHAR_VALUE)
1068 error (0, 0, _("\
1069character `%s' not defined while needed as default value"),
1070 "<space>");
1071 else
1072 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1073
1074 value = charset_find_value (charset, "tab", 3);
1075 if (value == ILLEGAL_CHAR_VALUE)
1076 error (0, 0, _("\
1077character `%s' not defined while needed as default value"),
1078 "<tab>");
1079 else
1080 ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
1081 }
1082
1083 if ((ctype->class_done & BIT (tok_graph)) == 0)
1084 /* "If this keyword [graph] is not specified, characters specified for
1085 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
1086 shall belong to this character class." [P1003.2, 2.5.2.1] */
1087 {
1088 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1089 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1090 size_t cnt;
1091
1092 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1093 if ((ctype->class_collection[cnt] & mask) != 0)
1094 ctype->class_collection[cnt] |= BIT (tok_graph);
1095 }
1096
1097 if ((ctype->class_done & BIT (tok_print)) == 0)
1098 /* "If this keyword [print] is not provided, characters specified for
1099 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
1100 and the <space> character shall belong to this character class."
1101 [P1003.2, 2.5.2.1] */
1102 {
1103 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
1104 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
1105 size_t cnt;
1106 int space;
1107
1108 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
1109 if ((ctype->class_collection[cnt] & mask) != 0)
1110 ctype->class_collection[cnt] |= BIT (tok_print);
1111
1112 space = charset_find_value (charset, "space", 5);
1113 if (space == ILLEGAL_CHAR_VALUE)
1114 error (0, 0, _("\
1115character `%s' not defined while needed as default value"),
1116 "<space>");
1117 else
1118 ELEM (ctype, class_collection, , space) |= BIT (tok_print);
1119 }
1120
1121 if (ctype->toupper_done == 0)
1122 /* "If this keyword [toupper] is not spcified, the lowercase letters
1123 `a' through `z', and their corresponding uppercase letters `A' to
1124 `Z', ..., shall automatically be included, with implementation-
1125 defined character values." [P1003.2, 2.5.2.1] */
1126 {
1127 char tmp[4];
1128 int ch;
1129
1130 strcpy (tmp, "<?>");
1131
1132 for (ch = 'a'; ch <= 'z'; ++ch)
1133 {
1134 unsigned int value_from, value_to;
1135
1136 tmp[1] = (char) ch;
1137
1138 value_from = charset_find_value (charset, &tmp[1], 1);
1139 if (value_from == ILLEGAL_CHAR_VALUE)
1140 {
1141 error (0, 0, _("\
1142character `%c' not defined while needed as default value"),
1143 tmp);
1144 continue;
1145 }
1146
1147 /* This conversion is implementation defined. */
1148 tmp[1] = (char) (ch + ('A' - 'a'));
1149 value_to = charset_find_value (charset, &tmp[1], 1);
1150 if (value_to == -1)
1151 {
1152 error (0, 0, _("\
1153character `%s' not defined while needed as default value"),
1154 tmp);
1155 continue;
1156 }
1157
1158 /* The index [0] is determined by the order of the
1159 `ctype_map_newP' calls in `ctype_startup'. */
1160 ELEM (ctype, map_collection, [0], value_from) = value_to;
1161 }
1162 }
1163
1164 if (ctype->tolower_done == 0)
1165 /* "If this keyword [tolower] is not specified, the mapping shall be
1166 the reverse mapping of the one specified to `toupper'." [P1003.2] */
1167 {
1168 size_t cnt;
1169
1170 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
1171 if (ctype->map_collection[0][cnt] != 0)
1172 ELEM (ctype, map_collection, [1],
1173 ctype->map_collection[0][cnt])
1174 = ctype->charnames[cnt];
1175 }
1176}
1177
1178
1179static void
75cd5204 1180allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
19bc17a9
RM
1181{
1182 size_t idx;
1183
1184 /* First we have to decide how we organize the arrays. It is easy for
1185 a one-byte character set. But multi-byte character set cannot be
1186 stored flat because they might be sparsly used. So we determine an
1187 optimal hashing function for the used characters.
1188
1189 We use a very trivial hashing function to store the sparse table.
1190 CH % TABSIZE is used as an index. To solve multiple hits we have
1191 N planes. This gurantees a fixed search time for a character [N
1192 / 2]. In the following code we determine the minmum value for
1193 TABSIZE * N, where TABSIZE >= 256. */
1194 size_t min_total = UINT_MAX;
1195 size_t act_size = 256;
1196
1197 fputs (_("\
1198Computing table size for character classes might take a while..."),
1199 stderr);
1200
1201 while (act_size < min_total)
1202 {
1203 size_t cnt[act_size];
1204 size_t act_planes = 1;
1205
1206 memset (cnt, '\0', sizeof cnt);
1207
1208 for (idx = 0; idx < 256; ++idx)
1209 cnt[idx] = 1;
1210
1211 for (idx = 0; idx < ctype->charnames_act; ++idx)
1212 if (ctype->charnames[idx] >= 256)
1213 {
1214 size_t nr = ctype->charnames[idx] % act_size;
1215
1216 if (++cnt[nr] > act_planes)
1217 {
1218 act_planes = cnt[nr];
1219 if (act_size * act_planes >= min_total)
1220 break;
1221 }
1222 }
1223
1224 if (act_size * act_planes < min_total)
1225 {
1226 min_total = act_size * act_planes;
1227 ctype->plane_size = act_size;
1228 ctype->plane_cnt = act_planes;
1229 }
1230
1231 ++act_size;
1232 }
1233
1234 fprintf (stderr, _(" done\n"));
1235
75cd5204 1236
19bc17a9
RM
1237#if __BYTE_ORDER == __LITTLE_ENDIAN
1238# define NAMES_B1 ctype->names_el
1239# define NAMES_B2 ctype->names_eb
1240#else
1241# define NAMES_B1 ctype->names_eb
1242# define NAMES_B2 ctype->names_el
1243#endif
1244
7a12c6bb
RM
1245 ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
1246 * ctype->plane_cnt,
1247 sizeof (u_int32_t));
1248 ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
1249 * ctype->plane_cnt,
1250 sizeof (u_int32_t));
19bc17a9
RM
1251
1252 for (idx = 1; idx < 256; ++idx)
1253 NAMES_B1[idx] = idx;
1254
1255 /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
1256 NAMES_B1[0] = 1;
1257
1258 for (idx = 256; idx < ctype->charnames_act; ++idx)
1259 {
1260 size_t nr = (ctype->charnames[idx] % ctype->plane_size);
1261 size_t depth = 0;
1262
1263 while (NAMES_B1[nr + depth * ctype->plane_size])
1264 ++depth;
1265 assert (depth < ctype->plane_cnt);
1266
1267 NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
1268
1269 /* Now for faster access remember the index in the NAMES_B array. */
1270 ctype->charnames[idx] = nr + depth * ctype->plane_size;
1271 }
1272 NAMES_B1[0] = 0;
1273
1274 for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
1275 NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
1276
1277
1278 /* You wonder about this amount of memory? This is only because some
1279 users do not manage to address the array with unsigned values or
1280 data types with range >= 256. '\200' would result in the array
1281 index -128. To help these poor people we duplicate the entries for
1282 128 up to 255 below the entry for \0. */
1283 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
1284 sizeof (char_class_t));
1285 ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
1286 * ctype->plane_cnt,
1287 sizeof (char_class32_t));
1288
1289 /* Fill in the character class information. */
1290#if __BYTE_ORDER == __LITTLE_ENDIAN
1291# define TRANS(w) CHAR_CLASS_TRANS (w)
1292# define TRANS32(w) CHAR_CLASS32_TRANS (w)
1293#else
1294# define TRANS(w) (w)
1295# define TRANS32(w) (w)
1296#endif
1297
1298 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1299 if (ctype->charnames[idx] < 256)
1300 ctype->ctype_b[128 + ctype->charnames[idx]]
1301 = TRANS (ctype->class_collection[idx]);
1302
75cd5204
RM
1303 /* Mirror first 127 entries. We must take care that entry -1 is not
1304 mirrored because EOF == -1. */
1305 for (idx = 0; idx < 127; ++idx)
19bc17a9
RM
1306 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
1307
1308 /* The 32 bit array contains all characters. */
1309 for (idx = 0; idx < ctype->class_collection_act; ++idx)
1310 ctype->ctype32_b[ctype->charnames[idx]]
1311 = TRANS32 (ctype->class_collection[idx]);
1312
1313 /* Room for table of mappings. */
7a12c6bb
RM
1314 ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
1315 * sizeof (u_int32_t *));
1316 ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
1317 * sizeof (u_int32_t *));
19bc17a9
RM
1318
1319 /* Fill in all mappings. */
1320 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
1321 {
1322 unsigned int idx2;
1323
1324 /* Allocate table. */
7a12c6bb
RM
1325 ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1326 * ctype->plane_cnt + 128)
1327 * sizeof (u_int32_t));
1328 ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
1329 * ctype->plane_cnt + 128)
1330 * sizeof (u_int32_t));
19bc17a9
RM
1331
1332#if __BYTE_ORDER == __LITTLE_ENDIAN
1333# define MAP_B1 ctype->map_el
1334# define MAP_B2 ctype->map_eb
1335#else
1336# define MAP_B1 ctype->map_eb
1337# define MAP_B2 ctype->map_el
1338#endif
1339
1340 /* Copy default value (identity mapping). */
1341 memcpy (&MAP_B1[idx][128], NAMES_B1,
7a12c6bb 1342 ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
19bc17a9
RM
1343
1344 /* Copy values from collection. */
1345 for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
1346 if (ctype->map_collection[idx][idx2] != 0)
1347 MAP_B1[idx][128 + ctype->charnames[idx2]] =
1348 ctype->map_collection[idx][idx2];
1349
75cd5204
RM
1350 /* Mirror first 127 entries. We must take care not to map entry
1351 -1 because EOF == -1. */
1352 for (idx2 = 0; idx2 < 127; ++idx2)
19bc17a9
RM
1353 MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
1354
75cd5204
RM
1355 /* EOF must map to EOF. */
1356 MAP_B1[idx][127] = EOF;
19bc17a9
RM
1357
1358 /* And now the other byte order. */
1359 for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
1360 MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
1361 }
1362
1363 /* Extra array for class and map names. */
7a12c6bb
RM
1364 ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
1365 * sizeof (u_int32_t));
1366 ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
1367 * sizeof (u_int32_t));
75cd5204
RM
1368
1369 /* Array for width information. Because the expected width are very
1370 small we use only one single byte. This save space and we need
1371 not provide the information twice with both endianesses. */
1372 ctype->width = (unsigned char *) xmalloc (ctype->plane_size
1373 * ctype->plane_cnt);
1374 /* Initialize with default width value. */
1375 memset (ctype->width, charset->width_default,
1376 ctype->plane_size * ctype->plane_cnt);
1377 if (charset->width_rules != NULL)
1378 {
1379 size_t cnt;
1380
1381 for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
1382 if (charset->width_rules[cnt].width != charset->width_default)
1383 for (idx = charset->width_rules[cnt].from;
1384 idx <= charset->width_rules[cnt].to; ++idx)
1385 {
1386 size_t nr = idx % ctype->plane_size;
1387 size_t depth = 0;
1388
1389 while (NAMES_B1[nr + depth * ctype->plane_size] != nr)
1390 ++depth;
1391 assert (depth < ctype->plane_cnt);
1392
1393 ctype->width[nr + depth * ctype->plane_size]
1394 = charset->width_rules[cnt].width;
1395 }
1396 }
0200214b
RM
1397
1398 /* Compute MB_CUR_MAX. Please note the value mb_cur_max in the
1399 character set definition gives the number of bytes in the wide
1400 character representation. We compute the number of bytes used
1401 for the UTF-8 encoded form. */
1402 ctype->mb_cur_max = ((int []) { 2, 3, 5, 6 }) [charset->mb_cur_max - 1];
6990326c
RM
1403
1404 /* We need the name of the currently used 8-bit character set to
1405 make correct conversion between this 8-bit representation and the
1406 ISO 10646 character set used internally for wide characters. */
1407 ctype->codeset_name = charset->code_set_name;
19bc17a9 1408}