locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   const char *fname;
  89   size_t lineno;
  90
  91   struct translit_to_t *to;
  92
  93   struct translit_t *next;
  94 };
  95
  96 struct translit_ignore_t
  97 {
  98   uint32_t from;
  99   uint32_t to;
 100   uint32_t step;
 101
 102   const char *fname;
 103   size_t lineno;
 104
 105   struct translit_ignore_t *next;
 106 };
 107
 108
 109 /* The real definition of the struct for the LC_CTYPE locale.  */
 110 struct locale_ctype_t
 111 {
 112   uint32_t *charnames;
 113   size_t charnames_max;
 114   size_t charnames_act;
 115
 116   struct repertoire_t *repertoire;
 117
 118   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 119 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 120   size_t nr_charclass;
 121   const char *classnames[MAX_NR_CHARCLASS];
 122   uint32_t last_class_char;
 123   uint32_t class256_collection[256];
 124   uint32_t *class_collection;
 125   size_t class_collection_max;
 126   size_t class_collection_act;
 127   uint32_t class_done;
 128   uint32_t class_offset;
 129
 130   struct charseq **mbdigits;
 131   size_t mbdigits_act;
 132   size_t mbdigits_max;
 133   uint32_t *wcdigits;
 134   size_t wcdigits_act;
 135   size_t wcdigits_max;
 136
 137   struct charseq *mboutdigits[10];
 138   uint32_t wcoutdigits[10];
 139   size_t outdigits_act;
 140
 141   /* If the following number ever turns out to be too small simply
 142      increase it.  But I doubt it will.  --drepper@gnu */
 143 #define MAX_NR_CHARMAP 16
 144   const char *mapnames[MAX_NR_CHARMAP];
 145   uint32_t *map_collection[MAX_NR_CHARMAP];
 146   uint32_t map256_collection[2][256];
 147   size_t map_collection_max[MAX_NR_CHARMAP];
 148   size_t map_collection_act[MAX_NR_CHARMAP];
 149   size_t map_collection_nr;
 150   size_t last_map_idx;
 151   int tomap_done[MAX_NR_CHARMAP];
 152   uint32_t map_offset;
 153
 154   /* Transliteration information.  */
 155   const char *translit_copy_locale;
 156   const char *translit_copy_repertoire;
 157   struct translit_t *translit;
 158   struct translit_ignore_t *translit_ignore;
 159   uint32_t ntranslit_ignore;
 160
 161   uint32_t *default_missing;
 162   const char *default_missing_file;
 163   size_t default_missing_lineno;
 164
 165   /* The arrays for the binary representation.  */
 166   uint32_t plane_size;
 167   uint32_t plane_cnt;
 168   char_class_t *ctype_b;
 169   char_class32_t *ctype32_b;
 170   uint32_t *names;
 171   uint32_t **map;
 172   uint32_t **map32;
 173   struct iovec *class_3level;
 174   struct iovec *map_3level;
 175   uint32_t *class_name_ptr;
 176   uint32_t *map_name_ptr;
 177   unsigned char *width;
 178   struct iovec width_3level;
 179   uint32_t mb_cur_max;
 180   const char *codeset_name;
 181   uint32_t *translit_from_idx;
 182   uint32_t *translit_from_tbl;
 183   uint32_t *translit_to_idx;
 184   uint32_t *translit_to_tbl;
 185   uint32_t translit_idx_size;
 186   size_t translit_from_tbl_size;
 187   size_t translit_to_tbl_size;
 188
 189   struct obstack mempool;
 190 };
 191
 192
 193 #define obstack_chunk_alloc xmalloc
 194 #define obstack_chunk_free free
 195
 196
 197 /* Prototypes for local functions.  */
 198 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 199                            struct charmap_t *charmap, int ignore_content);
 200 static void ctype_class_new (struct linereader *lr,
 201                              struct locale_ctype_t *ctype, const char *name);
 202 static void ctype_map_new (struct linereader *lr,
 203                            struct locale_ctype_t *ctype,
 204                            const char *name, struct charmap_t *charmap);
 205 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 206                            size_t *max, size_t *act, unsigned int idx);
 207 static void set_class_defaults (struct locale_ctype_t *ctype,
 208                                 struct charmap_t *charmap,
 209                                 struct repertoire_t *repertoire);
 210 static void allocate_arrays (struct locale_ctype_t *ctype,
 211                              struct charmap_t *charmap,
 212                              struct repertoire_t *repertoire);
 213
 214
 215 static const char *longnames[] =
 216 {
 217   "zero", "one", "two", "three", "four",
 218   "five", "six", "seven", "eight", "nine"
 219 };
 220 static const char *uninames[] =
 221 {
 222   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 223   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 224 };
 225 static const unsigned char digits[] = "0123456789";
 226
 227
 228 static void
 229 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 230                struct charmap_t *charmap, int ignore_content)
 231 {
 232   unsigned int cnt;
 233   struct locale_ctype_t *ctype;
 234
 235   if (!ignore_content)
 236     {
 237       /* Allocate the needed room.  */
 238       locale->categories[LC_CTYPE].ctype = ctype =
 239         (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
 240
 241       /* We have seen no names yet.  */
 242       ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 243       ctype->charnames =
 244         (unsigned int *) xmalloc (ctype->charnames_max
 245                                   * sizeof (unsigned int));
 246       for (cnt = 0; cnt < 256; ++cnt)
 247         ctype->charnames[cnt] = cnt;
 248       ctype->charnames_act = 256;
 249
 250       /* Fill character class information.  */
 251       ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 252       /* The order of the following instructions determines the bit
 253          positions!  */
 254       ctype_class_new (lr, ctype, "upper");
 255       ctype_class_new (lr, ctype, "lower");
 256       ctype_class_new (lr, ctype, "alpha");
 257       ctype_class_new (lr, ctype, "digit");
 258       ctype_class_new (lr, ctype, "xdigit");
 259       ctype_class_new (lr, ctype, "space");
 260       ctype_class_new (lr, ctype, "print");
 261       ctype_class_new (lr, ctype, "graph");
 262       ctype_class_new (lr, ctype, "blank");
 263       ctype_class_new (lr, ctype, "cntrl");
 264       ctype_class_new (lr, ctype, "punct");
 265       ctype_class_new (lr, ctype, "alnum");
 266 #ifdef PREDEFINED_CLASSES
 267       /* The following are extensions from ISO 14652.  */
 268       ctype_class_new (lr, ctype, "left_to_right");
 269       ctype_class_new (lr, ctype, "right_to_left");
 270       ctype_class_new (lr, ctype, "num_terminator");
 271       ctype_class_new (lr, ctype, "num_separator");
 272       ctype_class_new (lr, ctype, "segment_separator");
 273       ctype_class_new (lr, ctype, "block_separator");
 274       ctype_class_new (lr, ctype, "direction_control");
 275       ctype_class_new (lr, ctype, "sym_swap_layout");
 276       ctype_class_new (lr, ctype, "char_shape_selector");
 277       ctype_class_new (lr, ctype, "num_shape_selector");
 278       ctype_class_new (lr, ctype, "non_spacing");
 279       ctype_class_new (lr, ctype, "non_spacing_level3");
 280       ctype_class_new (lr, ctype, "normal_connect");
 281       ctype_class_new (lr, ctype, "r_connect");
 282       ctype_class_new (lr, ctype, "no_connect");
 283       ctype_class_new (lr, ctype, "no_connect-space");
 284       ctype_class_new (lr, ctype, "vowel_connect");
 285 #endif
 286
 287       ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 288       ctype->class_collection
 289         = (uint32_t *) xcalloc (sizeof (unsigned long int),
 290                                 ctype->class_collection_max);
 291       ctype->class_collection_act = 256;
 292
 293       /* Fill character map information.  */
 294       ctype->last_map_idx = MAX_NR_CHARMAP;
 295       ctype_map_new (lr, ctype, "toupper", charmap);
 296       ctype_map_new (lr, ctype, "tolower", charmap);
 297 #ifdef PREDEFINED_CLASSES
 298       ctype_map_new (lr, ctype, "tosymmetric", charmap);
 299 #endif
 300
 301       /* Fill first 256 entries in `toXXX' arrays.  */
 302       for (cnt = 0; cnt < 256; ++cnt)
 303         {
 304           ctype->map_collection[0][cnt] = cnt;
 305           ctype->map_collection[1][cnt] = cnt;
 306 #ifdef PREDEFINED_CLASSES
 307           ctype->map_collection[2][cnt] = cnt;
 308 #endif
 309           ctype->map256_collection[0][cnt] = cnt;
 310           ctype->map256_collection[1][cnt] = cnt;
 311         }
 312
 313       obstack_init (&ctype->mempool);
 314     }
 315 }
 316
 317
 318 void
 319 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 320 {
 321   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 322 #define NCLASS 12
 323   static const struct
 324   {
 325     const char *name;
 326     const char allow[NCLASS];
 327   }
 328   valid_table[NCLASS] =
 329   {
 330     /* The order is important.  See token.h for more information.
 331        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 332     { "upper",  "--MX-XDDXXX-" },
 333     { "lower",  "--MX-XDDXXX-" },
 334     { "alpha",  "---X-XDDXXX-" },
 335     { "digit",  "XXX--XDDXXX-" },
 336     { "xdigit", "-----XDDXXX-" },
 337     { "space",  "XXXXX------X" },
 338     { "print",  "---------X--" },
 339     { "graph",  "---------X--" },
 340     { "blank",  "XXXXXM-----X" },
 341     { "cntrl",  "XXXXX-XX--XX" },
 342     { "punct",  "XXXXX-DD-X-X" },
 343     { "alnum",  "-----XDDXXX-" }
 344   };
 345   size_t cnt;
 346   int cls1, cls2;
 347   uint32_t space_value;
 348   struct charseq *space_seq;
 349   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 350   int warned;
 351   const void *key;
 352   size_t len;
 353   void *vdata;
 354   void *curs;
 355
 356   /* Now resolve copying and also handle completely missing definitions.  */
 357   if (ctype == NULL)
 358     {
 359       const char *repertoire_name;
 360
 361       /* First see whether we were supposed to copy.  If yes, find the
 362          actual definition.  */
 363       if (locale->copy_name[LC_CTYPE] != NULL)
 364         {
 365           /* Find the copying locale.  This has to happen transitively since
 366              the locale we are copying from might also copying another one.  */
 367           struct localedef_t *from = locale;
 368
 369           do
 370             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 371                                 from->repertoire_name, charmap);
 372           while (from->categories[LC_CTYPE].ctype == NULL
 373                  && from->copy_name[LC_CTYPE] != NULL);
 374
 375           ctype = locale->categories[LC_CTYPE].ctype
 376             = from->categories[LC_CTYPE].ctype;
 377         }
 378
 379       /* If there is still no definition issue an warning and create an
 380          empty one.  */
 381       if (ctype == NULL)
 382         {
 383           if (! be_quiet)
 384             error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 385           ctype_startup (NULL, locale, charmap, 0);
 386           ctype = locale->categories[LC_CTYPE].ctype;
 387         }
 388
 389       /* Get the repertoire we have to use.  */
 390       repertoire_name = locale->repertoire_name ?: repertoire_global;
 391       if (repertoire_name != NULL)
 392         ctype->repertoire = repertoire_read (repertoire_name);
 393     }
 394
 395   /* We need the name of the currently used 8-bit character set to
 396      make correct conversion between this 8-bit representation and the
 397      ISO 10646 character set used internally for wide characters.  */
 398   ctype->codeset_name = charmap->code_set_name;
 399   if (ctype->codeset_name == NULL)
 400     {
 401       if (! be_quiet)
 402         error (0, 0, "no character set name specified in charmap");
 403       ctype->codeset_name = "//UNKNOWN//";
 404     }
 405
 406   /* Set default value for classes not specified.  */
 407   set_class_defaults (ctype, charmap, ctype->repertoire);
 408
 409   /* Check according to table.  */
 410   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 411     {
 412       uint32_t tmp = ctype->class_collection[cnt];
 413
 414       if (tmp != 0)
 415         {
 416           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 417             if ((tmp & _ISwbit (cls1)) != 0)
 418               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 419                 if (valid_table[cls1].allow[cls2] != '-')
 420                   {
 421                     int eq = (tmp & _ISwbit (cls2)) != 0;
 422                     switch (valid_table[cls1].allow[cls2])
 423                       {
 424                       case 'M':
 425                         if (!eq)
 426                           {
 427                             uint32_t value = ctype->charnames[cnt];
 428
 429                             if (!be_quiet)
 430                               error (0, 0, _("\
 431 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 432                                      value > 0xffff ? 8 : 4, value,
 433                                      valid_table[cls1].name,
 434                                      valid_table[cls2].name);
 435                           }
 436                         break;
 437
 438                       case 'X':
 439                         if (eq)
 440                           {
 441                             uint32_t value = ctype->charnames[cnt];
 442
 443                             if (!be_quiet)
 444                               error (0, 0, _("\
 445 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 446                                      value > 0xffff ? 8 : 4, value,
 447                                      valid_table[cls1].name,
 448                                      valid_table[cls2].name);
 449                           }
 450                         break;
 451
 452                       case 'D':
 453                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 454                         break;
 455
 456                       default:
 457                         error (5, 0, _("internal error in %s, line %u"),
 458                                __FUNCTION__, __LINE__);
 459                       }
 460                   }
 461         }
 462     }
 463
 464   for (cnt = 0; cnt < 256; ++cnt)
 465     {
 466       uint32_t tmp = ctype->class256_collection[cnt];
 467
 468       if (tmp != 0)
 469         {
 470           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 471             if ((tmp & _ISbit (cls1)) != 0)
 472               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 473                 if (valid_table[cls1].allow[cls2] != '-')
 474                   {
 475                     int eq = (tmp & _ISbit (cls2)) != 0;
 476                     switch (valid_table[cls1].allow[cls2])
 477                       {
 478                       case 'M':
 479                         if (!eq)
 480                           {
 481                             char buf[17];
 482
 483                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 484
 485                             if (!be_quiet)
 486                               error (0, 0, _("\
 487 character '%s' in class `%s' must be in class `%s'"),
 488                                      buf, valid_table[cls1].name,
 489                                      valid_table[cls2].name);
 490                           }
 491                         break;
 492
 493                       case 'X':
 494                         if (eq)
 495                           {
 496                             char buf[17];
 497
 498                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 499
 500                             if (!be_quiet)
 501                               error (0, 0, _("\
 502 character '%s' in class `%s' must not be in class `%s'"),
 503                                      buf, valid_table[cls1].name,
 504                                      valid_table[cls2].name);
 505                           }
 506                         break;
 507
 508                       case 'D':
 509                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 510                         break;
 511
 512                       default:
 513                         error (5, 0, _("internal error in %s, line %u"),
 514                                __FUNCTION__, __LINE__);
 515                       }
 516                   }
 517         }
 518     }
 519
 520   /* ... and now test <SP> as a special case.  */
 521   space_value = 32;
 522   if (((cnt = BITPOS (tok_space),
 523         (ELEM (ctype, class_collection, , space_value)
 524          & BITw (tok_space)) == 0)
 525        || (cnt = BITPOS (tok_blank),
 526            (ELEM (ctype, class_collection, , space_value)
 527             & BITw (tok_blank)) == 0)))
 528     {
 529       if (!be_quiet)
 530         error (0, 0, _("<SP> character not in class `%s'"),
 531                valid_table[cnt].name);
 532     }
 533   else if (((cnt = BITPOS (tok_punct),
 534              (ELEM (ctype, class_collection, , space_value)
 535               & BITw (tok_punct)) != 0)
 536             || (cnt = BITPOS (tok_graph),
 537                 (ELEM (ctype, class_collection, , space_value)
 538                  & BITw (tok_graph))
 539                 != 0)))
 540     {
 541       if (!be_quiet)
 542         error (0, 0, _("<SP> character must not be in class `%s'"),
 543                valid_table[cnt].name);
 544     }
 545   else
 546     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 547
 548   space_seq = charmap_find_value (charmap, "SP", 2);
 549   if (space_seq == NULL)
 550     space_seq = charmap_find_value (charmap, "space", 5);
 551   if (space_seq == NULL)
 552     space_seq = charmap_find_value (charmap, "U00000020", 9);
 553   if (space_seq == NULL || space_seq->nbytes != 1)
 554     {
 555       if (!be_quiet)
 556         error (0, 0, _("character <SP> not defined in character map"));
 557     }
 558   else if (((cnt = BITPOS (tok_space),
 559              (ctype->class256_collection[space_seq->bytes[0]]
 560               & BIT (tok_space)) == 0)
 561             || (cnt = BITPOS (tok_blank),
 562                 (ctype->class256_collection[space_seq->bytes[0]]
 563                  & BIT (tok_blank)) == 0)))
 564     {
 565       if (!be_quiet)
 566         error (0, 0, _("<SP> character not in class `%s'"),
 567                valid_table[cnt].name);
 568     }
 569   else if (((cnt = BITPOS (tok_punct),
 570              (ctype->class256_collection[space_seq->bytes[0]]
 571               & BIT (tok_punct)) != 0)
 572             || (cnt = BITPOS (tok_graph),
 573                 (ctype->class256_collection[space_seq->bytes[0]]
 574                  & BIT (tok_graph)) != 0)))
 575     {
 576       if (!be_quiet)
 577         error (0, 0, _("<SP> character must not be in class `%s'"),
 578                valid_table[cnt].name);
 579     }
 580   else
 581     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 582
 583   /* Now that the tests are done make sure the name array contains all
 584      characters which are handled in the WIDTH section of the
 585      character set definition file.  */
 586   if (charmap->width_rules != NULL)
 587     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 588       {
 589         unsigned char bytes[charmap->mb_cur_max];
 590         int nbytes = charmap->width_rules[cnt].from->nbytes;
 591
 592         /* We have the range of character for which the width is
 593            specified described using byte sequences of the multibyte
 594            charset.  We have to convert this to UCS4 now.  And we
 595            cannot simply convert the beginning and the end of the
 596            sequence, we have to iterate over the byte sequence and
 597            convert it for every single character.  */
 598         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 599
 600         while (nbytes < charmap->width_rules[cnt].to->nbytes
 601                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 602                           nbytes) <= 0)
 603           {
 604             /* Find the UCS value for `bytes'.  */
 605             int inner;
 606             uint32_t wch;
 607             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 608
 609             if (seq == NULL)
 610               wch = ILLEGAL_CHAR_VALUE;
 611             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 612               wch = seq->ucs4;
 613             else
 614               wch = repertoire_find_value (ctype->repertoire, seq->name,
 615                                            strlen (seq->name));
 616
 617             if (wch != ILLEGAL_CHAR_VALUE)
 618               /* We are only interested in the side-effects of the
 619                  `find_idx' call.  It will add appropriate entries in
 620                  the name array if this is necessary.  */
 621               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 622
 623             /* "Increment" the bytes sequence.  */
 624             inner = nbytes - 1;
 625             while (inner >= 0 && bytes[inner] == 0xff)
 626               --inner;
 627
 628             if (inner < 0)
 629               {
 630                 /* We have to extend the byte sequence.  */
 631                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 632                   break;
 633
 634                 bytes[0] = 1;
 635                 memset (&bytes[1], 0, nbytes);
 636                 ++nbytes;
 637               }
 638             else
 639               {
 640                 ++bytes[inner];
 641                 while (++inner < nbytes)
 642                   bytes[inner] = 0;
 643               }
 644           }
 645       }
 646
 647   /* Now set all the other characters of the character set to the
 648      default width.  */
 649   curs = NULL;
 650   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 651     {
 652       struct charseq *data = (struct charseq *) vdata;
 653
 654       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 655         data->ucs4 = repertoire_find_value (ctype->repertoire,
 656                                             data->name, len);
 657
 658       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 659         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 660     }
 661
 662   /* There must be a multiple of 10 digits.  */
 663   if (ctype->mbdigits_act % 10 != 0)
 664     {
 665       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 666       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 667       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 668       error (0, 0, _("`digit' category has not entries in groups of ten"));
 669     }
 670
 671   /* Check the input digits.  There must be a multiple of ten available.
 672      In each group it could be that one or the other character is missing.
 673      In this case the whole group must be removed.  */
 674   cnt = 0;
 675   while (cnt < ctype->mbdigits_act)
 676     {
 677       size_t inner;
 678       for (inner = 0; inner < 10; ++inner)
 679         if (ctype->mbdigits[cnt + inner] == NULL)
 680           break;
 681
 682       if (inner == 10)
 683         cnt += 10;
 684       else
 685         {
 686           /* Remove the group.  */
 687           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 688                    ((ctype->wcdigits_act - cnt - 10)
 689                     * sizeof (ctype->mbdigits[0])));
 690           ctype->mbdigits_act -= 10;
 691         }
 692     }
 693
 694   /* If no input digits are given use the default.  */
 695   if (ctype->mbdigits_act == 0)
 696     {
 697       if (ctype->mbdigits_max == 0)
 698         {
 699           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 700                                            10 * sizeof (struct charseq *));
 701           ctype->mbdigits_max = 10;
 702         }
 703
 704       for (cnt = 0; cnt < 10; ++cnt)
 705         {
 706           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 707                                                       digits + cnt, 1);
 708           if (ctype->mbdigits[cnt] == NULL)
 709             {
 710               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 711                                                           longnames[cnt],
 712                                                           strlen (longnames[cnt]));
 713               if (ctype->mbdigits[cnt] == NULL)
 714                 {
 715                   /* Hum, this ain't good.  */
 716                   error (0, 0, _("\
 717 no input digits defined and none of the standard names in the charmap"));
 718
 719                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 720                                                         sizeof (struct charseq) + 1);
 721
 722                   /* This is better than nothing.  */
 723                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 724                   ctype->mbdigits[cnt]->nbytes = 1;
 725                 }
 726             }
 727         }
 728
 729       ctype->mbdigits_act = 10;
 730     }
 731
 732   /* Check the wide character input digits.  There must be a multiple
 733      of ten available.  In each group it could be that one or the other
 734      character is missing.  In this case the whole group must be
 735      removed.  */
 736   cnt = 0;
 737   while (cnt < ctype->wcdigits_act)
 738     {
 739       size_t inner;
 740       for (inner = 0; inner < 10; ++inner)
 741         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 742           break;
 743
 744       if (inner == 10)
 745         cnt += 10;
 746       else
 747         {
 748           /* Remove the group.  */
 749           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 750                    ((ctype->wcdigits_act - cnt - 10)
 751                     * sizeof (ctype->wcdigits[0])));
 752           ctype->wcdigits_act -= 10;
 753         }
 754     }
 755
 756   /* If no input digits are given use the default.  */
 757   if (ctype->wcdigits_act == 0)
 758     {
 759       if (ctype->wcdigits_max == 0)
 760         {
 761           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 762                                            10 * sizeof (uint32_t));
 763           ctype->wcdigits_max = 10;
 764         }
 765
 766       for (cnt = 0; cnt < 10; ++cnt)
 767         ctype->wcdigits[cnt] = L'0' + cnt;
 768
 769       ctype->mbdigits_act = 10;
 770     }
 771
 772   /* Check the outdigits.  */
 773   warned = 0;
 774   for (cnt = 0; cnt < 10; ++cnt)
 775     if (ctype->mboutdigits[cnt] == NULL)
 776       {
 777         static struct charseq replace[2];
 778
 779         if (!warned)
 780           {
 781             error (0, 0, _("\
 782 not all characters used in `outdigit' are available in the charmap"));
 783             warned = 1;
 784           }
 785
 786         replace[0].nbytes = 1;
 787         replace[0].bytes[0] = '?';
 788         replace[0].bytes[1] = '\0';
 789         ctype->mboutdigits[cnt] = &replace[0];
 790       }
 791
 792   warned = 0;
 793   for (cnt = 0; cnt < 10; ++cnt)
 794     if (ctype->wcoutdigits[cnt] == 0)
 795       {
 796         if (!warned)
 797           {
 798             error (0, 0, _("\
 799 not all characters used in `outdigit' are available in the repertoire"));
 800             warned = 1;
 801           }
 802
 803         ctype->wcoutdigits[cnt] = L'?';
 804       }
 805
 806   /* Sort the entries in the translit_ignore list.  */
 807   if (ctype->translit_ignore != NULL)
 808     {
 809       struct translit_ignore_t *firstp = ctype->translit_ignore;
 810       struct translit_ignore_t *runp;
 811
 812       ctype->ntranslit_ignore = 1;
 813
 814       for (runp = firstp->next; runp != NULL; runp = runp->next)
 815         {
 816           struct translit_ignore_t *lastp = NULL;
 817           struct translit_ignore_t *cmpp;
 818
 819           ++ctype->ntranslit_ignore;
 820
 821           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 822             if (runp->from < cmpp->from)
 823               break;
 824
 825           runp->next = lastp;
 826           if (lastp == NULL)
 827             firstp = runp;
 828         }
 829
 830       ctype->translit_ignore = firstp;
 831     }
 832 }
 833
 834
 835 void
 836 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 837               const char *output_path)
 838 {
 839   static const char nulbytes[4] = { 0, 0, 0, 0 };
 840   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 841   const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
 842                          + (oldstyle_tables
 843                             ? (ctype->map_collection_nr - 2)
 844                             : (ctype->nr_charclass + ctype->map_collection_nr)));
 845   struct iovec iov[2 + nelems + ctype->nr_charclass
 846                   + ctype->map_collection_nr + 2];
 847   struct locale_file data;
 848   uint32_t idx[nelems + 1];
 849   uint32_t default_missing_len;
 850   size_t elem, cnt, offset, total;
 851   char *cp;
 852
 853   /* Now prepare the output: Find the sizes of the table we can use.  */
 854   allocate_arrays (ctype, charmap, ctype->repertoire);
 855
 856   data.magic = LIMAGIC (LC_CTYPE);
 857   data.n = nelems;
 858   iov[0].iov_base = (void *) &data;
 859   iov[0].iov_len = sizeof (data);
 860
 861   iov[1].iov_base = (void *) idx;
 862   iov[1].iov_len = nelems * sizeof (uint32_t);
 863
 864   idx[0] = iov[0].iov_len + iov[1].iov_len;
 865   offset = 0;
 866
 867   for (elem = 0; elem < nelems; ++elem)
 868     {
 869       if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
 870         switch (elem)
 871           {
 872 #define CTYPE_EMPTY(name) \
 873           case name:                                                          \
 874             iov[2 + elem + offset].iov_base = (void *) "";                    \
 875             iov[2 + elem + offset].iov_len = 0;                               \
 876             idx[elem + 1] = idx[elem];                                        \
 877             break
 878
 879           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 880           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 881           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 882
 883 #define CTYPE_DATA(name, base, len)                                           \
 884           case _NL_ITEM_INDEX (name):                                         \
 885             iov[2 + elem + offset].iov_base = (base);                         \
 886             iov[2 + elem + offset].iov_len = (len);                           \
 887             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 888             break
 889
 890           CTYPE_DATA (_NL_CTYPE_CLASS,
 891                       ctype->ctype_b,
 892                       (256 + 128) * sizeof (char_class_t));
 893
 894           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 895                       ctype->map[0],
 896                       (256 + 128) * sizeof (uint32_t));
 897           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 898                       ctype->map[1],
 899                       (256 + 128) * sizeof (uint32_t));
 900
 901           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 902                       ctype->map32[0],
 903                       (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
 904                       * sizeof (uint32_t));
 905           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 906                       ctype->map32[1],
 907                       (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
 908                       * sizeof (uint32_t));
 909
 910           CTYPE_DATA (_NL_CTYPE_CLASS32,
 911                       ctype->ctype32_b,
 912                       (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
 913                       * sizeof (char_class32_t));
 914
 915           CTYPE_DATA (_NL_CTYPE_NAMES,
 916                       ctype->names,
 917                       (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 0)
 918                       * sizeof (uint32_t));
 919
 920           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 921                       &ctype->class_offset, sizeof (uint32_t));
 922
 923           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 924                       &ctype->map_offset, sizeof (uint32_t));
 925
 926           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 927                       &ctype->translit_idx_size, sizeof (uint32_t));
 928
 929           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 930                       ctype->translit_from_idx,
 931                       ctype->translit_idx_size * sizeof (uint32_t));
 932
 933           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 934                       ctype->translit_from_tbl,
 935                       ctype->translit_from_tbl_size);
 936
 937           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 938                       ctype->translit_to_idx,
 939                       ctype->translit_idx_size * sizeof (uint32_t));
 940
 941           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 942                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 943
 944           CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
 945                       &ctype->plane_size, sizeof (uint32_t));
 946           CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
 947                       &ctype->plane_cnt, sizeof (uint32_t));
 948
 949           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 950             /* The class name array.  */
 951             total = 0;
 952             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 953               {
 954                 iov[2 + elem + offset].iov_base
 955                   = (void *) ctype->classnames[cnt];
 956                 iov[2 + elem + offset].iov_len
 957                   = strlen (ctype->classnames[cnt]) + 1;
 958                 total += iov[2 + elem + offset].iov_len;
 959               }
 960             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 961             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 962             total += 1 + (4 - ((total + 1) % 4));
 963
 964             idx[elem + 1] = idx[elem] + total;
 965             break;
 966
 967           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 968             /* The class name array.  */
 969             total = 0;
 970             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 971               {
 972                 iov[2 + elem + offset].iov_base
 973                   = (void *) ctype->mapnames[cnt];
 974                 iov[2 + elem + offset].iov_len
 975                   = strlen (ctype->mapnames[cnt]) + 1;
 976                 total += iov[2 + elem + offset].iov_len;
 977               }
 978             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 979             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 980             total += 1 + (4 - ((total + 1) % 4));
 981
 982             idx[elem + 1] = idx[elem] + total;
 983             break;
 984
 985           CTYPE_DATA (_NL_CTYPE_WIDTH,
 986                       (oldstyle_tables
 987                        ? ctype->width
 988                        : ctype->width_3level.iov_base),
 989                       (oldstyle_tables
 990                        ? (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul
 991                        : ctype->width_3level.iov_len));
 992
 993           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 994                       &ctype->mb_cur_max, sizeof (uint32_t));
 995
 996           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 997             total = strlen (ctype->codeset_name) + 1;
 998             if (total % 4 == 0)
 999               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1000             else
1001               {
1002                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1003                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1004                                  ctype->codeset_name, total),
1005                         '\0', 4 - (total & 3));
1006                 total = (total + 3) & ~3;
1007               }
1008             iov[2 + elem + offset].iov_len = total;
1009             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1010             break;
1011
1012           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1013             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1014             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1015             *(uint32_t *) iov[2 + elem + offset].iov_base =
1016               ctype->mbdigits_act / 10;
1017             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1018             break;
1019
1020           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1021             /* Align entries.  */
1022             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1023             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1024             idx[elem] += iov[2 + elem + offset].iov_len;
1025             ++offset;
1026
1027             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1028             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1029             *(uint32_t *) iov[2 + elem + offset].iov_base =
1030               ctype->wcdigits_act / 10;
1031             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1032             break;
1033
1034           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1035             /* Compute the length of all possible characters.  For INDIGITS
1036                there might be more than one.  We simply concatenate all of
1037                them with a NUL byte following.  The NUL byte wouldn't be
1038                necessary but it makes it easier for the user.  */
1039             total = 0;
1040
1041             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1042                  cnt < ctype->mbdigits_act; cnt += 10)
1043               total += ctype->mbdigits[cnt]->nbytes + 1;
1044             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1045             iov[2 + elem + offset].iov_len = total;
1046
1047             cp = iov[2 + elem + offset].iov_base;
1048             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1049                  cnt < ctype->mbdigits_act; cnt += 10)
1050               {
1051                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1052                               ctype->mbdigits[cnt]->nbytes);
1053                 *cp++ = '\0';
1054               }
1055             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1056             break;
1057
1058           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1059             /* Compute the length of all possible characters.  For INDIGITS
1060                there might be more than one.  We simply concatenate all of
1061                them with a NUL byte following.  The NUL byte wouldn't be
1062                necessary but it makes it easier for the user.  */
1063             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1064             total = ctype->mboutdigits[cnt]->nbytes + 1;
1065             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1066             iov[2 + elem + offset].iov_len = total;
1067
1068             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1069                                ctype->mboutdigits[cnt]->bytes,
1070                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1071             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1072             break;
1073
1074           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1075             total = ctype->wcdigits_act / 10;
1076
1077             iov[2 + elem + offset].iov_base =
1078               (uint32_t *) alloca (total * sizeof (uint32_t));
1079             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1080
1081             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1082                  cnt < ctype->wcdigits_act; cnt += 10)
1083               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1084                 = ctype->wcdigits[cnt];
1085             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1086             break;
1087
1088           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1089             /* Align entries.  */
1090             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1091             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1092             idx[elem] += iov[2 + elem + offset].iov_len;
1093             ++offset;
1094             /* FALLTRHOUGH */
1095
1096           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1097             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1098             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1099             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1100             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1101             break;
1102
1103           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1104             default_missing_len = (ctype->default_missing
1105                                    ? wcslen ((wchar_t *)ctype->default_missing)
1106                                    : 0);
1107             iov[2 + elem + offset].iov_base = &default_missing_len;
1108             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1109             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1110             break;
1111
1112           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1113             iov[2 + elem + offset].iov_base =
1114               ctype->default_missing ?: (uint32_t *) L"";
1115             iov[2 + elem + offset].iov_len =
1116               wcslen (iov[2 + elem + offset].iov_base);
1117             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1118             break;
1119
1120           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1121             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1122             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1123             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1124             break;
1125
1126           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1127             {
1128               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1129                                                       * 3 * sizeof (uint32_t));
1130               struct translit_ignore_t *runp;
1131
1132               iov[2 + elem + offset].iov_base = ranges;
1133               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1134                                                 * 3 * sizeof (uint32_t));
1135
1136               for (runp = ctype->translit_ignore; runp != NULL;
1137                    runp = runp->next)
1138                 {
1139                   *ranges++ = runp->from;
1140                   *ranges++ = runp->to;
1141                   *ranges++ = runp->step;
1142                 }
1143             }
1144             /* Remove the following line in case a new entry is added
1145                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1146             if (elem < nelems)
1147               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1148             break;
1149
1150           default:
1151             assert (! "unknown CTYPE element");
1152           }
1153       else
1154         {
1155           /* Handle extra maps.  */
1156           if (oldstyle_tables)
1157             {
1158               size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2;
1159
1160               iov[2 + elem + offset].iov_base = ctype->map32[nr];
1161               iov[2 + elem + offset].iov_len = ((ctype->plane_size
1162                                                  * ctype->plane_cnt)
1163                                                 * sizeof (uint32_t));
1164
1165               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1166             }
1167           else
1168             {
1169               size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
1170               if (nr < ctype->nr_charclass)
1171                 {
1172                   iov[2 + elem + offset] = ctype->class_3level[nr];
1173                 }
1174               else
1175                 {
1176                   nr -= ctype->nr_charclass;
1177                   assert (nr < ctype->map_collection_nr);
1178                   iov[2 + elem + offset] = ctype->map_3level[nr];
1179                 }
1180               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1181             }
1182         }
1183     }
1184
1185   assert (2 + elem + offset == (nelems + ctype->nr_charclass
1186                                 + ctype->map_collection_nr + 2 + 2));
1187
1188   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1189 }
1190
1191
1192 /* Local functions.  */
1193 static void
1194 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1195                  const char *name)
1196 {
1197   size_t cnt;
1198
1199   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1200     if (strcmp (ctype->classnames[cnt], name) == 0)
1201       break;
1202
1203   if (cnt < ctype->nr_charclass)
1204     {
1205       lr_error (lr, _("character class `%s' already defined"), name);
1206       return;
1207     }
1208
1209   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1210     /* Exit code 2 is prescribed in P1003.2b.  */
1211     error (2, 0, _("\
1212 implementation limit: no more than %Zd character classes allowed"),
1213            MAX_NR_CHARCLASS);
1214
1215   ctype->classnames[ctype->nr_charclass++] = name;
1216 }
1217
1218
1219 static void
1220 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1221                const char *name, struct charmap_t *charmap)
1222 {
1223   size_t max_chars = 0;
1224   size_t cnt;
1225
1226   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1227     {
1228       if (strcmp (ctype->mapnames[cnt], name) == 0)
1229         break;
1230
1231       if (max_chars < ctype->map_collection_max[cnt])
1232         max_chars = ctype->map_collection_max[cnt];
1233     }
1234
1235   if (cnt < ctype->map_collection_nr)
1236     {
1237       lr_error (lr, _("character map `%s' already defined"), name);
1238       return;
1239     }
1240
1241   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1242     /* Exit code 2 is prescribed in P1003.2b.  */
1243     error (2, 0, _("\
1244 implementation limit: no more than %d character maps allowed"),
1245            MAX_NR_CHARMAP);
1246
1247   ctype->mapnames[cnt] = name;
1248
1249   if (max_chars == 0)
1250     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1251   else
1252     ctype->map_collection_max[cnt] = max_chars;
1253
1254   ctype->map_collection[cnt] = (uint32_t *)
1255     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1256   ctype->map_collection_act[cnt] = 256;
1257
1258   ++ctype->map_collection_nr;
1259 }
1260
1261
1262 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1263    is possible if we only want to extend the name array.  */
1264 static uint32_t *
1265 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1266           size_t *act, uint32_t idx)
1267 {
1268   size_t cnt;
1269
1270   if (idx < 256)
1271     return table == NULL ? NULL : &(*table)[idx];
1272
1273   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1274     if (ctype->charnames[cnt] == idx)
1275       break;
1276
1277   /* We have to distinguish two cases: the name is found or not.  */
1278   if (cnt == ctype->charnames_act)
1279     {
1280       /* Extend the name array.  */
1281       if (ctype->charnames_act == ctype->charnames_max)
1282         {
1283           ctype->charnames_max *= 2;
1284           ctype->charnames = (uint32_t *)
1285             xrealloc (ctype->charnames,
1286                       sizeof (uint32_t) * ctype->charnames_max);
1287         }
1288       ctype->charnames[ctype->charnames_act++] = idx;
1289     }
1290
1291   if (table == NULL)
1292     /* We have done everything we are asked to do.  */
1293     return NULL;
1294
1295   if (cnt >= *act)
1296     {
1297       if (cnt >= *max)
1298         {
1299           size_t old_max = *max;
1300           do
1301             *max *= 2;
1302           while (*max <= cnt);
1303
1304           *table =
1305             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1306           memset (&(*table)[old_max], '\0',
1307                   (*max - old_max) * sizeof (uint32_t));
1308         }
1309
1310       *act = cnt + 1;
1311     }
1312
1313   return &(*table)[cnt];
1314 }
1315
1316
1317 static int
1318 get_character (struct token *now, struct charmap_t *charmap,
1319                struct repertoire_t *repertoire,
1320                struct charseq **seqp, uint32_t *wchp)
1321 {
1322   if (now->tok == tok_bsymbol)
1323     {
1324       /* This will hopefully be the normal case.  */
1325       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1326                                      now->val.str.lenmb);
1327       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1328                                   now->val.str.lenmb);
1329     }
1330   else if (now->tok == tok_ucs4)
1331     {
1332       char utmp[10];
1333
1334       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1335       *seqp = charmap_find_value (charmap, utmp, 9);
1336
1337       if (*seqp == NULL)
1338         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1339
1340       if (*seqp == NULL)
1341         {
1342           /* Compute the value in the charmap from the UCS value.  */
1343           const char *symbol = repertoire_find_symbol (repertoire,
1344                                                        now->val.ucs4);
1345
1346           if (symbol == NULL)
1347             *seqp = NULL;
1348           else
1349             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1350
1351           if (*seqp == NULL)
1352             {
1353               if (repertoire != NULL)
1354                 {
1355                   /* Insert a negative entry.  */
1356                   static const struct charseq negative
1357                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1358                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1359                                                   sizeof (uint32_t));
1360                   *newp = now->val.ucs4;
1361
1362                   insert_entry (&repertoire->seq_table, newp,
1363                                 sizeof (uint32_t), (void *) &negative);
1364                 }
1365             }
1366           else
1367             (*seqp)->ucs4 = now->val.ucs4;
1368         }
1369       else if ((*seqp)->ucs4 != now->val.ucs4)
1370         *seqp = NULL;
1371
1372       *wchp = now->val.ucs4;
1373     }
1374   else if (now->tok == tok_charcode)
1375     {
1376       /* We must map from the byte code to UCS4.  */
1377       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1378                                    now->val.str.lenmb);
1379
1380       if (*seqp == NULL)
1381         *wchp = ILLEGAL_CHAR_VALUE;
1382       else
1383         {
1384           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1385             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1386                                                    strlen ((*seqp)->name));
1387           *wchp = (*seqp)->ucs4;
1388         }
1389     }
1390   else
1391     return 1;
1392
1393   return 0;
1394 }
1395
1396
1397 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1398    the .(2). counterparts.  */
1399 static void
1400 charclass_symbolic_ellipsis (struct linereader *ldfile,
1401                              struct locale_ctype_t *ctype,
1402                              struct charmap_t *charmap,
1403                              struct repertoire_t *repertoire,
1404                              struct token *now,
1405                              const char *last_str,
1406                              unsigned long int class256_bit,
1407                              unsigned long int class_bit, int base,
1408                              int ignore_content, int handle_digits, int step)
1409 {
1410   const char *nowstr = now->val.str.startmb;
1411   char tmp[now->val.str.lenmb + 1];
1412   const char *cp;
1413   char *endp;
1414   unsigned long int from;
1415   unsigned long int to;
1416
1417   /* We have to compute the ellipsis values using the symbolic names.  */
1418   assert (last_str != NULL);
1419
1420   if (strlen (last_str) != now->val.str.lenmb)
1421     {
1422     invalid_range:
1423       lr_error (ldfile,
1424                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1425                 last_str, (int) now->val.str.lenmb, nowstr);
1426       return;
1427     }
1428
1429   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1430     /* Nothing to do, the names are the same.  */
1431     return;
1432
1433   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1434     ;
1435
1436   errno = 0;
1437   from = strtoul (cp, &endp, base);
1438   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1439     goto invalid_range;
1440
1441   to = strtoul (nowstr + (cp - last_str), &endp, base);
1442   if ((to == UINT_MAX && errno == ERANGE)
1443       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1444     goto invalid_range;
1445
1446   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1447   if (!ignore_content)
1448     {
1449       now->val.str.startmb = tmp;
1450       while ((from += step) <= to)
1451         {
1452           struct charseq *seq;
1453           uint32_t wch;
1454
1455           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1456                    last_str, now->val.str.lenmb - (cp - last_str), from);
1457
1458           get_character (now, charmap, repertoire, &seq, &wch);
1459
1460           if (seq != NULL && seq->nbytes == 1)
1461             /* Yep, we can store information about this byte sequence.  */
1462             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1463
1464           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1465             /* We have the UCS4 position.  */
1466             *find_idx (ctype, &ctype->class_collection,
1467                        &ctype->class_collection_max,
1468                        &ctype->class_collection_act, wch) |= class_bit;
1469
1470           if (handle_digits == 1)
1471             {
1472               /* We must store the digit values.  */
1473               if (ctype->mbdigits_act == ctype->mbdigits_max)
1474                 {
1475                   ctype->mbdigits_max *= 2;
1476                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1477                                               (ctype->mbdigits_max
1478                                                * sizeof (char *)));
1479                   ctype->wcdigits_max *= 2;
1480                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1481                                               (ctype->wcdigits_max
1482                                                * sizeof (uint32_t)));
1483                 }
1484
1485               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1486               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1487             }
1488           else if (handle_digits == 2)
1489             {
1490               /* We must store the digit values.  */
1491               if (ctype->outdigits_act >= 10)
1492                 {
1493                   lr_error (ldfile, _("\
1494 %s: field `%s' does not contain exactly ten entries"),
1495                             "LC_CTYPE", "outdigit");
1496                   return;
1497                 }
1498
1499               ctype->mboutdigits[ctype->outdigits_act] = seq;
1500               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1501               ++ctype->outdigits_act;
1502             }
1503         }
1504     }
1505 }
1506
1507
1508 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1509 static void
1510 charclass_ucs4_ellipsis (struct linereader *ldfile,
1511                          struct locale_ctype_t *ctype,
1512                          struct charmap_t *charmap,
1513                          struct repertoire_t *repertoire,
1514                          struct token *now, uint32_t last_wch,
1515                          unsigned long int class256_bit,
1516                          unsigned long int class_bit, int ignore_content,
1517                          int handle_digits, int step)
1518 {
1519   if (last_wch > now->val.ucs4)
1520     {
1521       lr_error (ldfile, _("\
1522 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1523                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1524                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1525       return;
1526     }
1527
1528   if (!ignore_content)
1529     while ((last_wch += step) <= now->val.ucs4)
1530       {
1531         /* We have to find out whether there is a byte sequence corresponding
1532            to this UCS4 value.  */
1533         struct charseq *seq;
1534         char utmp[10];
1535
1536         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1537         seq = charmap_find_value (charmap, utmp, 9);
1538         if (seq == NULL)
1539           {
1540             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1541             seq = charmap_find_value (charmap, utmp, 5);
1542           }
1543
1544         if (seq == NULL)
1545           /* Try looking in the repertoire map.  */
1546           seq = repertoire_find_seq (repertoire, last_wch);
1547
1548         /* If this is the first time we look for this sequence create a new
1549            entry.  */
1550         if (seq == NULL)
1551           {
1552             static const struct charseq negative
1553               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1554
1555             /* Find the symbolic name for this UCS4 value.  */
1556             if (repertoire != NULL)
1557               {
1558                 const char *symbol = repertoire_find_symbol (repertoire,
1559                                                              last_wch);
1560                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1561                                                 sizeof (uint32_t));
1562                 *newp = last_wch;
1563
1564                 if (symbol != NULL)
1565                   /* We have a name, now search the multibyte value.  */
1566                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1567
1568                 if (seq == NULL)
1569                   /* We have to create a fake entry.  */
1570                   seq = (struct charseq *) &negative;
1571                 else
1572                   seq->ucs4 = last_wch;
1573
1574                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1575                               seq);
1576               }
1577             else
1578               /* We have to create a fake entry.  */
1579               seq = (struct charseq *) &negative;
1580           }
1581
1582         /* We have a name, now search the multibyte value.  */
1583         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1584           /* Yep, we can store information about this byte sequence.  */
1585           ctype->class256_collection[(size_t) seq->bytes[0]]
1586             |= class256_bit;
1587
1588         /* And of course we have the UCS4 position.  */
1589         if (class_bit != 0)
1590           *find_idx (ctype, &ctype->class_collection,
1591                      &ctype->class_collection_max,
1592                      &ctype->class_collection_act, last_wch) |= class_bit;
1593
1594         if (handle_digits == 1)
1595           {
1596             /* We must store the digit values.  */
1597             if (ctype->mbdigits_act == ctype->mbdigits_max)
1598               {
1599                 ctype->mbdigits_max *= 2;
1600                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1601                                             (ctype->mbdigits_max
1602                                              * sizeof (char *)));
1603                 ctype->wcdigits_max *= 2;
1604                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1605                                             (ctype->wcdigits_max
1606                                              * sizeof (uint32_t)));
1607               }
1608
1609             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1610                                                       ? seq : NULL);
1611             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1612           }
1613         else if (handle_digits == 2)
1614           {
1615             /* We must store the digit values.  */
1616             if (ctype->outdigits_act >= 10)
1617               {
1618                 lr_error (ldfile, _("\
1619 %s: field `%s' does not contain exactly ten entries"),
1620                           "LC_CTYPE", "outdigit");
1621                 return;
1622               }
1623
1624             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1625                                                         ? seq : NULL);
1626             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1627             ++ctype->outdigits_act;
1628           }
1629       }
1630 }
1631
1632
1633 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1634 static void
1635 charclass_charcode_ellipsis (struct linereader *ldfile,
1636                              struct locale_ctype_t *ctype,
1637                              struct charmap_t *charmap,
1638                              struct repertoire_t *repertoire,
1639                              struct token *now, char *last_charcode,
1640                              uint32_t last_charcode_len,
1641                              unsigned long int class256_bit,
1642                              unsigned long int class_bit, int ignore_content,
1643                              int handle_digits)
1644 {
1645   /* First check whether the to-value is larger.  */
1646   if (now->val.charcode.nbytes != last_charcode_len)
1647     {
1648       lr_error (ldfile, _("\
1649 start end end character sequence of range must have the same length"));
1650       return;
1651     }
1652
1653   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1654     {
1655       lr_error (ldfile, _("\
1656 to-value character sequence is smaller than from-value sequence"));
1657       return;
1658     }
1659
1660   if (!ignore_content)
1661     {
1662       do
1663         {
1664           /* Increment the byte sequence value.  */
1665           struct charseq *seq;
1666           uint32_t wch;
1667           int i;
1668
1669           for (i = last_charcode_len - 1; i >= 0; --i)
1670             if (++last_charcode[i] != 0)
1671               break;
1672
1673           if (last_charcode_len == 1)
1674             /* Of course we have the charcode value.  */
1675             ctype->class256_collection[(size_t) last_charcode[0]]
1676               |= class256_bit;
1677
1678           /* Find the symbolic name.  */
1679           seq = charmap_find_symbol (charmap, last_charcode,
1680                                      last_charcode_len);
1681           if (seq != NULL)
1682             {
1683               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1684                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1685                                                    strlen (seq->name));
1686               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1687
1688               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1689                 *find_idx (ctype, &ctype->class_collection,
1690                            &ctype->class_collection_max,
1691                            &ctype->class_collection_act, wch) |= class_bit;
1692             }
1693           else
1694             wch = ILLEGAL_CHAR_VALUE;
1695
1696           if (handle_digits == 1)
1697             {
1698               /* We must store the digit values.  */
1699               if (ctype->mbdigits_act == ctype->mbdigits_max)
1700                 {
1701                   ctype->mbdigits_max *= 2;
1702                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1703                                               (ctype->mbdigits_max
1704                                                * sizeof (char *)));
1705                   ctype->wcdigits_max *= 2;
1706                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1707                                               (ctype->wcdigits_max
1708                                                * sizeof (uint32_t)));
1709                 }
1710
1711               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1712               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1713               seq->nbytes = last_charcode_len;
1714
1715               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1716               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1717             }
1718           else if (handle_digits == 2)
1719             {
1720               struct charseq *seq;
1721               /* We must store the digit values.  */
1722               if (ctype->outdigits_act >= 10)
1723                 {
1724                   lr_error (ldfile, _("\
1725 %s: field `%s' does not contain exactly ten entries"),
1726                             "LC_CTYPE", "outdigit");
1727                   return;
1728                 }
1729
1730               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1731               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1732               seq->nbytes = last_charcode_len;
1733
1734               ctype->mboutdigits[ctype->outdigits_act] = seq;
1735               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1736               ++ctype->outdigits_act;
1737             }
1738         }
1739       while (memcmp (last_charcode, now->val.charcode.bytes,
1740                      last_charcode_len) != 0);
1741     }
1742 }
1743
1744
1745 /* Read one transliteration entry.  */
1746 static uint32_t *
1747 read_widestring (struct linereader *ldfile, struct token *now,
1748                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1749 {
1750   uint32_t *wstr;
1751
1752   if (now->tok == tok_default_missing)
1753     /* The special name "" will denote this case.  */
1754     wstr = ((uint32_t *) { 0 });
1755   else if (now->tok == tok_bsymbol)
1756     {
1757       /* Get the value from the repertoire.  */
1758       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1759       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1760                                        now->val.str.lenmb);
1761       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1762         {
1763           /* We cannot proceed, we don't know the UCS4 value.  */
1764           free (wstr);
1765           return NULL;
1766         }
1767
1768       wstr[1] = 0;
1769     }
1770   else if (now->tok == tok_ucs4)
1771     {
1772       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1773       wstr[0] = now->val.ucs4;
1774       wstr[1] = 0;
1775     }
1776   else if (now->tok == tok_charcode)
1777     {
1778       /* Argh, we have to convert to the symbol name first and then to the
1779          UCS4 value.  */
1780       struct charseq *seq = charmap_find_symbol (charmap,
1781                                                  now->val.str.startmb,
1782                                                  now->val.str.lenmb);
1783       if (seq == NULL)
1784         /* Cannot find the UCS4 value.  */
1785         return NULL;
1786
1787       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1788         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1789                                            strlen (seq->name));
1790       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1791         /* We cannot proceed, we don't know the UCS4 value.  */
1792         return NULL;
1793
1794       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1795       wstr[0] = seq->ucs4;
1796       wstr[1] = 0;
1797     }
1798   else if (now->tok == tok_string)
1799     {
1800       wstr = now->val.str.startwc;
1801       if (wstr == NULL || wstr[0] == 0)
1802         return NULL;
1803     }
1804   else
1805     {
1806       if (now->tok != tok_eol && now->tok != tok_eof)
1807         lr_ignore_rest (ldfile, 0);
1808       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1809       return (uint32_t *) -1l;
1810     }
1811
1812   return wstr;
1813 }
1814
1815
1816 static void
1817 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1818                      struct token *now, struct charmap_t *charmap,
1819                      struct repertoire_t *repertoire)
1820 {
1821   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1822   struct translit_t *result;
1823   struct translit_to_t **top;
1824   struct obstack *ob = &ctype->mempool;
1825   int first;
1826   int ignore;
1827
1828   if (from_wstr == NULL)
1829     /* There is no valid from string.  */
1830     return;
1831
1832   result = (struct translit_t *) obstack_alloc (ob,
1833                                                 sizeof (struct translit_t));
1834   result->from = from_wstr;
1835   result->fname = ldfile->fname;
1836   result->lineno = ldfile->lineno;
1837   result->next = NULL;
1838   result->to = NULL;
1839   top = &result->to;
1840   first = 1;
1841   ignore = 0;
1842
1843   while (1)
1844     {
1845       uint32_t *to_wstr;
1846
1847       /* Next we have one or more transliterations.  They are
1848          separated by semicolons.  */
1849       now = lr_token (ldfile, charmap, repertoire);
1850
1851       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1852         {
1853           /* One string read.  */
1854           const uint32_t zero = 0;
1855
1856           if (!ignore)
1857             {
1858               obstack_grow (ob, &zero, 4);
1859               to_wstr = obstack_finish (ob);
1860
1861               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1862               (*top)->str = to_wstr;
1863               (*top)->next = NULL;
1864             }
1865
1866           if (now->tok == tok_eol)
1867             {
1868               result->next = ctype->translit;
1869               ctype->translit = result;
1870               return;
1871             }
1872
1873           if (!ignore)
1874             top = &(*top)->next;
1875           ignore = 0;
1876         }
1877       else
1878         {
1879           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1880           if (to_wstr == (uint32_t *) -1l)
1881             {
1882               /* An error occurred.  */
1883               obstack_free (ob, result);
1884               return;
1885             }
1886
1887           if (to_wstr == NULL)
1888             ignore = 1;
1889           else
1890             /* This value is usable.  */
1891             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1892
1893           first = 0;
1894         }
1895     }
1896 }
1897
1898
1899 static void
1900 read_translit_ignore_entry (struct linereader *ldfile,
1901                             struct locale_ctype_t *ctype,
1902                             struct charmap_t *charmap,
1903                             struct repertoire_t *repertoire)
1904 {
1905   /* We expect a semicolon-separated list of characters we ignore.  We are
1906      only interested in the wide character definitions.  These must be
1907      single characters, possibly defining a range when an ellipsis is used.  */
1908   while (1)
1909     {
1910       struct token *now = lr_token (ldfile, charmap, repertoire);
1911       struct translit_ignore_t *newp;
1912       uint32_t from;
1913
1914       if (now->tok == tok_eol || now->tok == tok_eof)
1915         {
1916           lr_error (ldfile,
1917                     _("premature end of `translit_ignore' definition"));
1918           return;
1919         }
1920
1921       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1922         {
1923           lr_error (ldfile, _("syntax error"));
1924           lr_ignore_rest (ldfile, 0);
1925           return;
1926         }
1927
1928       if (now->tok == tok_ucs4)
1929         from = now->val.ucs4;
1930       else
1931         /* Try to get the value.  */
1932         from = repertoire_find_value (repertoire, now->val.str.startmb,
1933                                       now->val.str.lenmb);
1934
1935       if (from == ILLEGAL_CHAR_VALUE)
1936         {
1937           lr_error (ldfile, "invalid character name");
1938           newp = NULL;
1939         }
1940       else
1941         {
1942           newp = (struct translit_ignore_t *)
1943             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1944           newp->from = from;
1945           newp->to = from;
1946           newp->step = 1;
1947
1948           newp->next = ctype->translit_ignore;
1949           ctype->translit_ignore = newp;
1950         }
1951
1952       /* Now we expect either a semicolon, an ellipsis, or the end of the
1953          line.  */
1954       now = lr_token (ldfile, charmap, repertoire);
1955
1956       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1957         {
1958           /* XXX Should we bother implementing `....'?  `...' certainly
1959              will not be implemented.  */
1960           uint32_t to;
1961           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
1962
1963           now = lr_token (ldfile, charmap, repertoire);
1964
1965           if (now->tok == tok_eol || now->tok == tok_eof)
1966             {
1967               lr_error (ldfile,
1968                         _("premature end of `translit_ignore' definition"));
1969               return;
1970             }
1971
1972           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1973             {
1974               lr_error (ldfile, _("syntax error"));
1975               lr_ignore_rest (ldfile, 0);
1976               return;
1977             }
1978
1979           if (now->tok == tok_ucs4)
1980             to = now->val.ucs4;
1981           else
1982             /* Try to get the value.  */
1983             to = repertoire_find_value (repertoire, now->val.str.startmb,
1984                                         now->val.str.lenmb);
1985
1986           if (to == ILLEGAL_CHAR_VALUE)
1987             lr_error (ldfile, "invalid character name");
1988           else
1989             {
1990               /* Make sure the `to'-value is larger.  */
1991               if (to >= from)
1992                 {
1993                   newp->to = to;
1994                   newp->step = step;
1995                 }
1996               else
1997                 lr_error (ldfile, _("\
1998 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1999                           (to | from) < 65536 ? 4 : 8, to,
2000                           (to | from) < 65536 ? 4 : 8, from);
2001             }
2002
2003           /* And the next token.  */
2004           now = lr_token (ldfile, charmap, repertoire);
2005         }
2006
2007       if (now->tok == tok_eol || now->tok == tok_eof)
2008         /* We are done.  */
2009         return;
2010
2011       if (now->tok == tok_semicolon)
2012         /* Next round.  */
2013         continue;
2014
2015       /* If we come here something is wrong.  */
2016       lr_error (ldfile, _("syntax error"));
2017       lr_ignore_rest (ldfile, 0);
2018       return;
2019     }
2020 }
2021
2022
2023 /* The parser for the LC_CTYPE section of the locale definition.  */
2024 void
2025 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2026             struct charmap_t *charmap, const char *repertoire_name,
2027             int ignore_content)
2028 {
2029   struct repertoire_t *repertoire = NULL;
2030   struct locale_ctype_t *ctype;
2031   struct token *now;
2032   enum token_t nowtok;
2033   size_t cnt;
2034   struct charseq *last_seq;
2035   uint32_t last_wch = 0;
2036   enum token_t last_token;
2037   enum token_t ellipsis_token;
2038   int step;
2039   char last_charcode[16];
2040   size_t last_charcode_len = 0;
2041   const char *last_str = NULL;
2042   int mapidx;
2043
2044   /* Get the repertoire we have to use.  */
2045   if (repertoire_name != NULL)
2046     repertoire = repertoire_read (repertoire_name);
2047
2048   /* The rest of the line containing `LC_CTYPE' must be free.  */
2049   lr_ignore_rest (ldfile, 1);
2050
2051
2052   do
2053     {
2054       now = lr_token (ldfile, charmap, NULL);
2055       nowtok = now->tok;
2056     }
2057   while (nowtok == tok_eol);
2058
2059   /* If we see `copy' now we are almost done.  */
2060   if (nowtok == tok_copy)
2061     {
2062       handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_ctype,
2063                    LC_CTYPE, "LC_CTYPE", ignore_content);
2064       return;
2065     }
2066
2067   /* Prepare the data structures.  */
2068   ctype_startup (ldfile, result, charmap, ignore_content);
2069   ctype = result->categories[LC_CTYPE].ctype;
2070
2071   /* Remember the repertoire we use.  */
2072   if (!ignore_content)
2073     ctype->repertoire = repertoire;
2074
2075   while (1)
2076     {
2077       unsigned long int class_bit = 0;
2078       unsigned long int class256_bit = 0;
2079       int handle_digits = 0;
2080
2081       /* Of course we don't proceed beyond the end of file.  */
2082       if (nowtok == tok_eof)
2083         break;
2084
2085       /* Ingore empty lines.  */
2086       if (nowtok == tok_eol)
2087         {
2088           now = lr_token (ldfile, charmap, NULL);
2089           nowtok = now->tok;
2090           continue;
2091         }
2092
2093       switch (nowtok)
2094         {
2095         case tok_charclass:
2096           now = lr_token (ldfile, charmap, NULL);
2097           while (now->tok == tok_ident || now->tok == tok_string)
2098             {
2099               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2100               now = lr_token (ldfile, charmap, NULL);
2101               if (now->tok != tok_semicolon)
2102                 break;
2103               now = lr_token (ldfile, charmap, NULL);
2104             }
2105           if (now->tok != tok_eol)
2106             SYNTAX_ERROR (_("\
2107 %s: syntax error in definition of new character class"), "LC_CTYPE");
2108           break;
2109
2110         case tok_charconv:
2111           now = lr_token (ldfile, charmap, NULL);
2112           while (now->tok == tok_ident || now->tok == tok_string)
2113             {
2114               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2115               now = lr_token (ldfile, charmap, NULL);
2116               if (now->tok != tok_semicolon)
2117                 break;
2118               now = lr_token (ldfile, charmap, NULL);
2119             }
2120           if (now->tok != tok_eol)
2121             SYNTAX_ERROR (_("\
2122 %s: syntax error in definition of new character map"), "LC_CTYPE");
2123           break;
2124
2125         case tok_class:
2126           /* Ignore the rest of the line if we don't need the input of
2127              this line.  */
2128           if (ignore_content)
2129             {
2130               lr_ignore_rest (ldfile, 0);
2131               break;
2132             }
2133
2134           /* We simply forget the `class' keyword and use the following
2135              operand to determine the bit.  */
2136           now = lr_token (ldfile, charmap, NULL);
2137           if (now->tok == tok_ident || now->tok == tok_string)
2138             {
2139               /* Must can be one of the predefined class names.  */
2140               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2141                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2142                   break;
2143               if (cnt >= ctype->nr_charclass)
2144                 {
2145 #ifdef PREDEFINED_CLASSES
2146                   if (now->val.str.lenmb == 8
2147                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2148                     class_bit = _ISwspecial1;
2149                   else if (now->val.str.lenmb == 8
2150                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2151                     class_bit = _ISwspecial2;
2152                   else if (now->val.str.lenmb == 8
2153                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2154                     class_bit = _ISwspecial3;
2155                   else
2156 #endif
2157                     {
2158                       /* OK, it's a new class.  */
2159                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2160
2161                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2162                     }
2163                 }
2164               else
2165                 {
2166                   class_bit = _ISwbit (cnt);
2167
2168                   free (now->val.str.startmb);
2169                 }
2170             }
2171           else if (now->tok == tok_digit)
2172             goto handle_tok_digit;
2173           else if (now->tok < tok_upper || now->tok > tok_blank)
2174             goto err_label;
2175           else
2176             {
2177               class_bit = BITw (now->tok);
2178               class256_bit = BIT (now->tok);
2179             }
2180
2181           /* The next character must be a semicolon.  */
2182           now = lr_token (ldfile, charmap, NULL);
2183           if (now->tok != tok_semicolon)
2184             goto err_label;
2185           goto read_charclass;
2186
2187         case tok_upper:
2188         case tok_lower:
2189         case tok_alpha:
2190         case tok_alnum:
2191         case tok_space:
2192         case tok_cntrl:
2193         case tok_punct:
2194         case tok_graph:
2195         case tok_print:
2196         case tok_xdigit:
2197         case tok_blank:
2198           /* Ignore the rest of the line if we don't need the input of
2199              this line.  */
2200           if (ignore_content)
2201             {
2202               lr_ignore_rest (ldfile, 0);
2203               break;
2204             }
2205
2206           class_bit = BITw (now->tok);
2207           class256_bit = BIT (now->tok);
2208           handle_digits = 0;
2209         read_charclass:
2210           ctype->class_done |= class_bit;
2211           last_token = tok_none;
2212           ellipsis_token = tok_none;
2213           step = 1;
2214           now = lr_token (ldfile, charmap, NULL);
2215           while (now->tok != tok_eol && now->tok != tok_eof)
2216             {
2217               uint32_t wch;
2218               struct charseq *seq;
2219
2220               if (ellipsis_token == tok_none)
2221                 {
2222                   if (get_character (now, charmap, repertoire, &seq, &wch))
2223                     goto err_label;
2224
2225                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2226                     /* Yep, we can store information about this byte
2227                        sequence.  */
2228                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2229
2230                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2231                       && class_bit != 0)
2232                     /* We have the UCS4 position.  */
2233                     *find_idx (ctype, &ctype->class_collection,
2234                                &ctype->class_collection_max,
2235                                &ctype->class_collection_act, wch) |= class_bit;
2236
2237                   last_token = now->tok;
2238                   /* Terminate the string.  */
2239                   if (last_token == tok_bsymbol)
2240                     {
2241                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2242                       last_str = now->val.str.startmb;
2243                     }
2244                   else
2245                     last_str = NULL;
2246                   last_seq = seq;
2247                   last_wch = wch;
2248                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2249                   last_charcode_len = now->val.charcode.nbytes;
2250
2251                   if (!ignore_content && handle_digits == 1)
2252                     {
2253                       /* We must store the digit values.  */
2254                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2255                         {
2256                           ctype->mbdigits_max += 10;
2257                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2258                                                       (ctype->mbdigits_max
2259                                                        * sizeof (char *)));
2260                           ctype->wcdigits_max += 10;
2261                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2262                                                       (ctype->wcdigits_max
2263                                                        * sizeof (uint32_t)));
2264                         }
2265
2266                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2267                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2268                     }
2269                   else if (!ignore_content && handle_digits == 2)
2270                     {
2271                       /* We must store the digit values.  */
2272                       if (ctype->outdigits_act >= 10)
2273                         {
2274                           lr_error (ldfile, _("\
2275 %s: field `%s' does not contain exactly ten entries"),
2276                             "LC_CTYPE", "outdigit");
2277                           goto err_label;
2278                         }
2279
2280                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2281                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2282                       ++ctype->outdigits_act;
2283                     }
2284                 }
2285               else
2286                 {
2287                   /* Now it gets complicated.  We have to resolve the
2288                      ellipsis problem.  First we must distinguish between
2289                      the different kind of ellipsis and this must match the
2290                      tokens we have seen.  */
2291                   assert (last_token != tok_none);
2292
2293                   if (last_token != now->tok)
2294                     {
2295                       lr_error (ldfile, _("\
2296 ellipsis range must be marked by two operands of same type"));
2297                       lr_ignore_rest (ldfile, 0);
2298                       break;
2299                     }
2300
2301                   if (last_token == tok_bsymbol)
2302                     {
2303                       if (ellipsis_token == tok_ellipsis3)
2304                         lr_error (ldfile, _("with symbolic name range values \
2305 the absolute ellipsis `...' must not be used"));
2306
2307                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2308                                                    repertoire, now, last_str,
2309                                                    class256_bit, class_bit,
2310                                                    (ellipsis_token
2311                                                     == tok_ellipsis4
2312                                                     ? 10 : 16),
2313                                                    ignore_content,
2314                                                    handle_digits, step);
2315                     }
2316                   else if (last_token == tok_ucs4)
2317                     {
2318                       if (ellipsis_token != tok_ellipsis2)
2319                         lr_error (ldfile, _("\
2320 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2321
2322                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2323                                                repertoire, now, last_wch,
2324                                                class256_bit, class_bit,
2325                                                ignore_content, handle_digits,
2326                                                step);
2327                     }
2328                   else
2329                     {
2330                       assert (last_token == tok_charcode);
2331
2332                       if (ellipsis_token != tok_ellipsis3)
2333                         lr_error (ldfile, _("\
2334 with character code range values one must use the absolute ellipsis `...'"));
2335
2336                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2337                                                    repertoire, now,
2338                                                    last_charcode,
2339                                                    last_charcode_len,
2340                                                    class256_bit, class_bit,
2341                                                    ignore_content,
2342                                                    handle_digits);
2343                     }
2344
2345                   /* Now we have used the last value.  */
2346                   last_token = tok_none;
2347                 }
2348
2349               /* Next we expect a semicolon or the end of the line.  */
2350               now = lr_token (ldfile, charmap, NULL);
2351               if (now->tok == tok_eol || now->tok == tok_eof)
2352                 break;
2353
2354               if (last_token != tok_none
2355                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2356                 {
2357                   if (now->tok == tok_ellipsis2_2)
2358                     {
2359                       now->tok = tok_ellipsis2;
2360                       step = 2;
2361                     }
2362                   else if (now->tok == tok_ellipsis4_2)
2363                     {
2364                       now->tok = tok_ellipsis4;
2365                       step = 2;
2366                     }
2367
2368                   ellipsis_token = now->tok;
2369
2370                   now = lr_token (ldfile, charmap, NULL);
2371                   continue;
2372                 }
2373
2374               if (now->tok != tok_semicolon)
2375                 goto err_label;
2376
2377               /* And get the next character.  */
2378               now = lr_token (ldfile, charmap, NULL);
2379
2380               ellipsis_token = tok_none;
2381               step = 1;
2382             }
2383           break;
2384
2385         case tok_digit:
2386           /* Ignore the rest of the line if we don't need the input of
2387              this line.  */
2388           if (ignore_content)
2389             {
2390               lr_ignore_rest (ldfile, 0);
2391               break;
2392             }
2393
2394         handle_tok_digit:
2395           class_bit = _ISwdigit;
2396           class256_bit = _ISdigit;
2397           handle_digits = 1;
2398           goto read_charclass;
2399
2400         case tok_outdigit:
2401           /* Ignore the rest of the line if we don't need the input of
2402              this line.  */
2403           if (ignore_content)
2404             {
2405               lr_ignore_rest (ldfile, 0);
2406               break;
2407             }
2408
2409           if (ctype->outdigits_act != 0)
2410             lr_error (ldfile, _("\
2411 %s: field `%s' declared more than once"),
2412                       "LC_CTYPE", "outdigit");
2413           class_bit = 0;
2414           class256_bit = 0;
2415           handle_digits = 2;
2416           goto read_charclass;
2417
2418         case tok_toupper:
2419           /* Ignore the rest of the line if we don't need the input of
2420              this line.  */
2421           if (ignore_content)
2422             {
2423               lr_ignore_rest (ldfile, 0);
2424               break;
2425             }
2426
2427           mapidx = 0;
2428           goto read_mapping;
2429
2430         case tok_tolower:
2431           /* Ignore the rest of the line if we don't need the input of
2432              this line.  */
2433           if (ignore_content)
2434             {
2435               lr_ignore_rest (ldfile, 0);
2436               break;
2437             }
2438
2439           mapidx = 1;
2440           goto read_mapping;
2441
2442         case tok_map:
2443           /* Ignore the rest of the line if we don't need the input of
2444              this line.  */
2445           if (ignore_content)
2446             {
2447               lr_ignore_rest (ldfile, 0);
2448               break;
2449             }
2450
2451           /* We simply forget the `map' keyword and use the following
2452              operand to determine the mapping.  */
2453           now = lr_token (ldfile, charmap, NULL);
2454           if (now->tok == tok_ident || now->tok == tok_string)
2455             {
2456               size_t cnt;
2457
2458               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2459                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2460                   break;
2461
2462               if (cnt < ctype->map_collection_nr)
2463                 free (now->val.str.startmb);
2464               else
2465                 /* OK, it's a new map.  */
2466                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2467
2468               mapidx = cnt;
2469             }
2470           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2471             goto err_label;
2472           else
2473             mapidx = now->tok - tok_toupper;
2474
2475           now = lr_token (ldfile, charmap, NULL);
2476           /* This better should be a semicolon.  */
2477           if (now->tok != tok_semicolon)
2478             goto err_label;
2479
2480         read_mapping:
2481           /* Test whether this mapping was already defined.  */
2482           if (ctype->tomap_done[mapidx])
2483             {
2484               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2485                         ctype->mapnames[mapidx]);
2486               lr_ignore_rest (ldfile, 0);
2487               break;
2488             }
2489           ctype->tomap_done[mapidx] = 1;
2490
2491           now = lr_token (ldfile, charmap, NULL);
2492           while (now->tok != tok_eol && now->tok != tok_eof)
2493             {
2494               struct charseq *from_seq;
2495               uint32_t from_wch;
2496               struct charseq *to_seq;
2497               uint32_t to_wch;
2498
2499               /* Every pair starts with an opening brace.  */
2500               if (now->tok != tok_open_brace)
2501                 goto err_label;
2502
2503               /* Next comes the from-value.  */
2504               now = lr_token (ldfile, charmap, NULL);
2505               if (get_character (now, charmap, repertoire, &from_seq,
2506                                  &from_wch) != 0)
2507                 goto err_label;
2508
2509               /* The next is a comma.  */
2510               now = lr_token (ldfile, charmap, NULL);
2511               if (now->tok != tok_comma)
2512                 goto err_label;
2513
2514               /* And the other value.  */
2515               now = lr_token (ldfile, charmap, NULL);
2516               if (get_character (now, charmap, repertoire, &to_seq,
2517                                  &to_wch) != 0)
2518                 goto err_label;
2519
2520               /* And the last thing is the closing brace.  */
2521               now = lr_token (ldfile, charmap, NULL);
2522               if (now->tok != tok_close_brace)
2523                 goto err_label;
2524
2525               if (!ignore_content)
2526                 {
2527                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2528                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2529                     /* We can use this value.  */
2530                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2531                       = to_seq->bytes[0];
2532
2533                   if (from_wch != ILLEGAL_CHAR_VALUE
2534                       && to_wch != ILLEGAL_CHAR_VALUE)
2535                     /* Both correct values.  */
2536                     *find_idx (ctype, &ctype->map_collection[mapidx],
2537                                &ctype->map_collection_max[mapidx],
2538                                &ctype->map_collection_act[mapidx],
2539                                from_wch) = to_wch;
2540                 }
2541
2542               /* Now comes a semicolon or the end of the line/file.  */
2543               now = lr_token (ldfile, charmap, NULL);
2544               if (now->tok == tok_semicolon)
2545                 now = lr_token (ldfile, charmap, NULL);
2546             }
2547           break;
2548
2549         case tok_translit_start:
2550           /* Ignore the rest of the line if we don't need the input of
2551              this line.  */
2552           if (ignore_content)
2553             {
2554               lr_ignore_rest (ldfile, 0);
2555               break;
2556             }
2557
2558           /* The rest of the line better should be empty.  */
2559           lr_ignore_rest (ldfile, 1);
2560
2561           /* We count here the number of allocated entries in the `translit'
2562              array.  */
2563           cnt = 0;
2564
2565           /* We proceed until we see the `translit_end' token.  */
2566           while (now = lr_token (ldfile, charmap, repertoire),
2567                  now->tok != tok_translit_end && now->tok != tok_eof)
2568             {
2569               if (now->tok == tok_eol)
2570                 /* Ignore empty lines.  */
2571                 continue;
2572
2573               if (now->tok == tok_translit_end)
2574                 {
2575                   lr_ignore_rest (ldfile, 0);
2576                   break;
2577                 }
2578
2579               if (now->tok == tok_include)
2580                 {
2581                   /* We have to include locale.  */
2582                   const char *locale_name;
2583                   const char *repertoire_name;
2584
2585                   now = lr_token (ldfile, charmap, NULL);
2586                   /* This should be a string or an identifier.  In any
2587                      case something to name a locale.  */
2588                   if (now->tok != tok_string && now->tok != tok_ident)
2589                     {
2590                     translit_syntax:
2591                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2592                       lr_ignore_rest (ldfile, 0);
2593                       continue;
2594                     }
2595                   locale_name = now->val.str.startmb;
2596
2597                   /* Next should be a semicolon.  */
2598                   now = lr_token (ldfile, charmap, NULL);
2599                   if (now->tok != tok_semicolon)
2600                     goto translit_syntax;
2601
2602                   /* Now the repertoire name.  */
2603                   now = lr_token (ldfile, charmap, NULL);
2604                   if ((now->tok != tok_string && now->tok != tok_ident)
2605                       || now->val.str.startmb == NULL)
2606                     goto translit_syntax;
2607                   repertoire_name = now->val.str.startmb;
2608
2609                   /* We must not have more than one `include'.  */
2610                   if (ctype->translit_copy_locale != NULL)
2611                     {
2612                       lr_error (ldfile, _("\
2613 %s: only one `include' instruction allowed"), "LC_CTYPE");
2614                       lr_ignore_rest (ldfile, 0);
2615                       continue;
2616                     }
2617
2618                   ctype->translit_copy_locale = locale_name;
2619                   ctype->translit_copy_repertoire = repertoire_name;
2620
2621                   /* The rest of the line must be empty.  */
2622                   lr_ignore_rest (ldfile, 1);
2623
2624                   /* Make sure the locale is read.  */
2625                   add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2626                                    repertoire_name, 1, NULL);
2627                   continue;
2628                 }
2629               else if (now->tok == tok_default_missing)
2630                 {
2631                   uint32_t *wstr;
2632
2633                   /* We expect a single character or string as the
2634                      argument.  */
2635                   now = lr_token (ldfile, charmap, NULL);
2636                   wstr = read_widestring (ldfile, now, charmap, repertoire);
2637
2638                   if (wstr != NULL)
2639                     {
2640                       if (ctype->default_missing != NULL)
2641                         {
2642                           lr_error (ldfile, _("\
2643 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2644                           error_at_line (0, 0, ctype->default_missing_file,
2645                                          ctype->default_missing_lineno,
2646                                          _("previous definition was here"));
2647                         }
2648                       else
2649                         {
2650                           ctype->default_missing = wstr;
2651                           ctype->default_missing_file = ldfile->fname;
2652                           ctype->default_missing_lineno = ldfile->lineno;
2653                         }
2654                     }
2655                   lr_ignore_rest (ldfile, 1);
2656                   continue;
2657                 }
2658               else if (now->tok == tok_translit_ignore)
2659                 {
2660                   read_translit_ignore_entry (ldfile, ctype, charmap,
2661                                               repertoire);
2662                   continue;
2663                 }
2664
2665               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2666             }
2667           break;
2668
2669         case tok_ident:
2670           /* Ignore the rest of the line if we don't need the input of
2671              this line.  */
2672           if (ignore_content)
2673             {
2674               lr_ignore_rest (ldfile, 0);
2675               break;
2676             }
2677
2678           /* This could mean one of several things.  First test whether
2679              it's a character class name.  */
2680           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2681             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2682               break;
2683           if (cnt < ctype->nr_charclass)
2684             {
2685               class_bit = _ISwbit (cnt);
2686               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2687               free (now->val.str.startmb);
2688               goto read_charclass;
2689             }
2690           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2691             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2692               break;
2693           if (cnt < ctype->map_collection_nr)
2694             {
2695               mapidx = cnt;
2696               free (now->val.str.startmb);
2697               goto read_mapping;
2698             }
2699 #ifdef PREDEFINED_CLASSES
2700           if (strcmp (now->val.str.startmb, "special1") == 0)
2701             {
2702               class_bit = _ISwspecial1;
2703               free (now->val.str.startmb);
2704               goto read_charclass;
2705             }
2706           if (strcmp (now->val.str.startmb, "special2") == 0)
2707             {
2708               class_bit = _ISwspecial2;
2709               free (now->val.str.startmb);
2710               goto read_charclass;
2711             }
2712           if (strcmp (now->val.str.startmb, "special3") == 0)
2713             {
2714               class_bit = _ISwspecial3;
2715               free (now->val.str.startmb);
2716               goto read_charclass;
2717             }
2718           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2719             {
2720               mapidx = 2;
2721               goto read_mapping;
2722             }
2723 #endif
2724           break;
2725
2726         case tok_end:
2727           /* Next we assume `LC_CTYPE'.  */
2728           now = lr_token (ldfile, charmap, NULL);
2729           if (now->tok == tok_eof)
2730             break;
2731           if (now->tok == tok_eol)
2732             lr_error (ldfile, _("%s: incomplete `END' line"),
2733                       "LC_CTYPE");
2734           else if (now->tok != tok_lc_ctype)
2735             lr_error (ldfile, _("\
2736 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2737           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2738           return;
2739
2740         default:
2741         err_label:
2742           if (now->tok != tok_eof)
2743             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2744         }
2745
2746       /* Prepare for the next round.  */
2747       now = lr_token (ldfile, charmap, NULL);
2748       nowtok = now->tok;
2749     }
2750
2751   /* When we come here we reached the end of the file.  */
2752   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2753 }
2754
2755
2756 static void
2757 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2758                     struct repertoire_t *repertoire)
2759 {
2760   size_t cnt;
2761
2762   /* These function defines the default values for the classes and conversions
2763      according to POSIX.2 2.5.2.1.
2764      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2765      Don't move them unless you know what you do!  */
2766
2767   void set_default (int bitpos, int from, int to)
2768     {
2769       char tmp[2];
2770       int ch;
2771       int bit = _ISbit (bitpos);
2772       int bitw = _ISwbit (bitpos);
2773       /* Define string.  */
2774       strcpy (tmp, "?");
2775
2776       for (ch = from; ch <= to; ++ch)
2777         {
2778           struct charseq *seq;
2779           tmp[0] = ch;
2780
2781           seq = charmap_find_value (charmap, tmp, 1);
2782           if (seq == NULL)
2783             {
2784               if (!be_quiet)
2785                 error (0, 0, _("\
2786 %s: character `%s' not defined in charmap while needed as default value"),
2787                        "LC_CTYPE", tmp);
2788             }
2789           else if (seq->nbytes != 1)
2790             error (0, 0, _("\
2791 %s: character `%s' in charmap not representable with one byte"),
2792                    "LC_CTYPE", tmp);
2793           else
2794             ctype->class256_collection[seq->bytes[0]] |= bit;
2795
2796           /* No need to search here, the ASCII value is also the Unicode
2797              value.  */
2798           ELEM (ctype, class_collection, , ch) |= bitw;
2799         }
2800     }
2801
2802   /* Set default values if keyword was not present.  */
2803   if ((ctype->class_done & BITw (tok_upper)) == 0)
2804     /* "If this keyword [lower] is not specified, the lowercase letters
2805         `A' through `Z', ..., shall automatically belong to this class,
2806         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2807     set_default (BITPOS (tok_upper), 'A', 'Z');
2808
2809   if ((ctype->class_done & BITw (tok_lower)) == 0)
2810     /* "If this keyword [lower] is not specified, the lowercase letters
2811         `a' through `z', ..., shall automatically belong to this class,
2812         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2813     set_default (BITPOS (tok_lower), 'a', 'z');
2814
2815   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2816     {
2817       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2818          class `lower' *must* be in class `alpha'.  */
2819       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2820       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2821
2822       for (cnt = 0; cnt < 256; ++cnt)
2823         if ((ctype->class256_collection[cnt] & mask) != 0)
2824           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2825
2826       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2827         if ((ctype->class_collection[cnt] & maskw) != 0)
2828           ctype->class_collection[cnt] |= BITw (tok_alpha);
2829     }
2830
2831   if ((ctype->class_done & BITw (tok_digit)) == 0)
2832     /* "If this keyword [digit] is not specified, the digits `0' through
2833         `9', ..., shall automatically belong to this class, with
2834         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2835     set_default (BITPOS (tok_digit), '0', '9');
2836
2837   /* "Only characters specified for the `alpha' and `digit' keyword
2838      shall be specified.  Characters specified for the keyword `alpha'
2839      and `digit' are automatically included in this class.  */
2840   {
2841     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2842     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2843
2844     for (cnt = 0; cnt < 256; ++cnt)
2845       if ((ctype->class256_collection[cnt] & mask) != 0)
2846         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2847
2848     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2849       if ((ctype->class_collection[cnt] & maskw) != 0)
2850         ctype->class_collection[cnt] |= BITw (tok_alnum);
2851   }
2852
2853   if ((ctype->class_done & BITw (tok_space)) == 0)
2854     /* "If this keyword [space] is not specified, the characters <space>,
2855         <form-feed>, <newline>, <carriage-return>, <tab>, and
2856         <vertical-tab>, ..., shall automatically belong to this class,
2857         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2858     {
2859       struct charseq *seq;
2860
2861       seq = charmap_find_value (charmap, "space", 5);
2862       if (seq == NULL)
2863         seq = charmap_find_value (charmap, "SP", 2);
2864       if (seq == NULL)
2865         seq = charmap_find_value (charmap, "U00000020", 9);
2866       if (seq == NULL)
2867         {
2868           if (!be_quiet)
2869             error (0, 0, _("\
2870 %s: character `%s' not defined while needed as default value"),
2871                    "LC_CTYPE", "<space>");
2872         }
2873       else if (seq->nbytes != 1)
2874         error (0, 0, _("\
2875 %s: character `%s' in charmap not representable with one byte"),
2876                "LC_CTYPE", "<space>");
2877       else
2878         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2879
2880       /* No need to search.  */
2881       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2882
2883       seq = charmap_find_value (charmap, "form-feed", 9);
2884       if (seq == NULL)
2885         seq = charmap_find_value (charmap, "U0000000C", 9);
2886       if (seq == NULL)
2887         {
2888           if (!be_quiet)
2889             error (0, 0, _("\
2890 %s: character `%s' not defined while needed as default value"),
2891                    "LC_CTYPE", "<form-feed>");
2892         }
2893       else if (seq->nbytes != 1)
2894         error (0, 0, _("\
2895 %s: character `%s' in charmap not representable with one byte"),
2896                "LC_CTYPE", "<form-feed>");
2897       else
2898         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2899
2900       /* No need to search.  */
2901       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
2902
2903
2904       seq = charmap_find_value (charmap, "newline", 7);
2905       if (seq == NULL)
2906         seq = charmap_find_value (charmap, "U0000000A", 9);
2907       if (seq == NULL)
2908         {
2909           if (!be_quiet)
2910             error (0, 0, _("\
2911 character `%s' not defined while needed as default value"),
2912                    "<newline>");
2913         }
2914       else if (seq->nbytes != 1)
2915         error (0, 0, _("\
2916 %s: character `%s' in charmap not representable with one byte"),
2917                "LC_CTYPE", "<newline>");
2918       else
2919         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2920
2921       /* No need to search.  */
2922       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
2923
2924
2925       seq = charmap_find_value (charmap, "carriage-return", 15);
2926       if (seq == NULL)
2927         seq = charmap_find_value (charmap, "U0000000D", 9);
2928       if (seq == NULL)
2929         {
2930           if (!be_quiet)
2931             error (0, 0, _("\
2932 %s: character `%s' not defined while needed as default value"),
2933                    "LC_CTYPE", "<carriage-return>");
2934         }
2935       else if (seq->nbytes != 1)
2936         error (0, 0, _("\
2937 %s: character `%s' in charmap not representable with one byte"),
2938                "LC_CTYPE", "<carriage-return>");
2939       else
2940         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2941
2942       /* No need to search.  */
2943       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
2944
2945
2946       seq = charmap_find_value (charmap, "tab", 3);
2947       if (seq == NULL)
2948         seq = charmap_find_value (charmap, "U00000009", 9);
2949       if (seq == NULL)
2950         {
2951           if (!be_quiet)
2952             error (0, 0, _("\
2953 %s: character `%s' not defined while needed as default value"),
2954                    "LC_CTYPE", "<tab>");
2955         }
2956       else if (seq->nbytes != 1)
2957         error (0, 0, _("\
2958 %s: character `%s' in charmap not representable with one byte"),
2959                "LC_CTYPE", "<tab>");
2960       else
2961         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2962
2963       /* No need to search.  */
2964       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
2965
2966
2967       seq = charmap_find_value (charmap, "vertical-tab", 12);
2968       if (seq == NULL)
2969         seq = charmap_find_value (charmap, "U0000000B", 9);
2970       if (seq == NULL)
2971         {
2972           if (!be_quiet)
2973             error (0, 0, _("\
2974 %s: character `%s' not defined while needed as default value"),
2975                    "LC_CTYPE", "<vertical-tab>");
2976         }
2977       else if (seq->nbytes != 1)
2978         error (0, 0, _("\
2979 %s: character `%s' in charmap not representable with one byte"),
2980                "LC_CTYPE", "<vertical-tab>");
2981       else
2982         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2983
2984       /* No need to search.  */
2985       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
2986     }
2987
2988   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
2989     /* "If this keyword is not specified, the digits `0' to `9', the
2990         uppercase letters `A' through `F', and the lowercase letters `a'
2991         through `f', ..., shell automatically belong to this class, with
2992         implementation defined character values."  [P1003.2, 2.5.2.1]  */
2993     {
2994       set_default (BITPOS (tok_xdigit), '0', '9');
2995       set_default (BITPOS (tok_xdigit), 'A', 'F');
2996       set_default (BITPOS (tok_xdigit), 'a', 'f');
2997     }
2998
2999   if ((ctype->class_done & BITw (tok_blank)) == 0)
3000     /* "If this keyword [blank] is unspecified, the characters <space> and
3001        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3002    {
3003       struct charseq *seq;
3004
3005       seq = charmap_find_value (charmap, "space", 5);
3006       if (seq == NULL)
3007         seq = charmap_find_value (charmap, "SP", 2);
3008       if (seq == NULL)
3009         seq = charmap_find_value (charmap, "U00000020", 9);
3010       if (seq == NULL)
3011         {
3012           if (!be_quiet)
3013             error (0, 0, _("\
3014 %s: character `%s' not defined while needed as default value"),
3015                    "LC_CTYPE", "<space>");
3016         }
3017       else if (seq->nbytes != 1)
3018         error (0, 0, _("\
3019 %s: character `%s' in charmap not representable with one byte"),
3020                "LC_CTYPE", "<space>");
3021       else
3022         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3023
3024       /* No need to search.  */
3025       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3026
3027
3028       seq = charmap_find_value (charmap, "tab", 3);
3029       if (seq == NULL)
3030         seq = charmap_find_value (charmap, "U00000009", 9);
3031       if (seq == NULL)
3032         {
3033           if (!be_quiet)
3034             error (0, 0, _("\
3035 %s: character `%s' not defined while needed as default value"),
3036                    "LC_CTYPE", "<tab>");
3037         }
3038       else if (seq->nbytes != 1)
3039         error (0, 0, _("\
3040 %s: character `%s' in charmap not representable with one byte"),
3041                "LC_CTYPE", "<tab>");
3042       else
3043         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3044
3045       /* No need to search.  */
3046       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3047     }
3048
3049   if ((ctype->class_done & BITw (tok_graph)) == 0)
3050     /* "If this keyword [graph] is not specified, characters specified for
3051         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3052         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3053     {
3054       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3055         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3056       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3057         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3058         BITw (tok_punct);
3059       size_t cnt;
3060
3061       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3062         if ((ctype->class_collection[cnt] & maskw) != 0)
3063           ctype->class_collection[cnt] |= BITw (tok_graph);
3064
3065       for (cnt = 0; cnt < 256; ++cnt)
3066         if ((ctype->class256_collection[cnt] & mask) != 0)
3067           ctype->class256_collection[cnt] |= BIT (tok_graph);
3068     }
3069
3070   if ((ctype->class_done & BITw (tok_print)) == 0)
3071     /* "If this keyword [print] is not provided, characters specified for
3072         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3073         and the <space> character shall belong to this character class."
3074         [P1003.2, 2.5.2.1]  */
3075     {
3076       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3077         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3078       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3079         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3080         BITw (tok_punct);
3081       size_t cnt;
3082       struct charseq *seq;
3083
3084       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3085         if ((ctype->class_collection[cnt] & maskw) != 0)
3086           ctype->class_collection[cnt] |= BITw (tok_print);
3087
3088       for (cnt = 0; cnt < 256; ++cnt)
3089         if ((ctype->class256_collection[cnt] & mask) != 0)
3090           ctype->class256_collection[cnt] |= BIT (tok_print);
3091
3092
3093       seq = charmap_find_value (charmap, "space", 5);
3094       if (seq == NULL)
3095         seq = charmap_find_value (charmap, "SP", 2);
3096       if (seq == NULL)
3097         seq = charmap_find_value (charmap, "U00000020", 9);
3098       if (seq == NULL)
3099         {
3100           if (!be_quiet)
3101             error (0, 0, _("\
3102 %s: character `%s' not defined while needed as default value"),
3103                    "LC_CTYPE", "<space>");
3104         }
3105       else if (seq->nbytes != 1)
3106         error (0, 0, _("\
3107 %s: character `%s' in charmap not representable with one byte"),
3108                "LC_CTYPE", "<space>");
3109       else
3110         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3111
3112       /* No need to search.  */
3113       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3114     }
3115
3116   if (ctype->tomap_done[0] == 0)
3117     /* "If this keyword [toupper] is not specified, the lowercase letters
3118         `a' through `z', and their corresponding uppercase letters `A' to
3119         `Z', ..., shall automatically be included, with implementation-
3120         defined character values."  [P1003.2, 2.5.2.1]  */
3121     {
3122       char tmp[4];
3123       int ch;
3124
3125       strcpy (tmp, "<?>");
3126
3127       for (ch = 'a'; ch <= 'z'; ++ch)
3128         {
3129           struct charseq *seq_from, *seq_to;
3130
3131           tmp[1] = (char) ch;
3132
3133           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3134           if (seq_from == NULL)
3135             {
3136               if (!be_quiet)
3137                 error (0, 0, _("\
3138 %s: character `%s' not defined while needed as default value"),
3139                        "LC_CTYPE", tmp);
3140             }
3141           else if (seq_from->nbytes != 1)
3142             {
3143               if (!be_quiet)
3144                 error (0, 0, _("\
3145 %s: character `%s' needed as default value not representable with one byte"),
3146                        "LC_CTYPE", tmp);
3147             }
3148           else
3149             {
3150               /* This conversion is implementation defined.  */
3151               tmp[1] = (char) (ch + ('A' - 'a'));
3152               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3153               if (seq_to == NULL)
3154                 {
3155                   if (!be_quiet)
3156                     error (0, 0, _("\
3157 %s: character `%s' not defined while needed as default value"),
3158                            "LC_CTYPE", tmp);
3159                 }
3160               else if (seq_to->nbytes != 1)
3161                 {
3162                   if (!be_quiet)
3163                     error (0, 0, _("\
3164 %s: character `%s' needed as default value not representable with one byte"),
3165                            "LC_CTYPE", tmp);
3166                 }
3167               else
3168                 /* The index [0] is determined by the order of the
3169                    `ctype_map_newP' calls in `ctype_startup'.  */
3170                 ctype->map256_collection[0][seq_from->bytes[0]]
3171                   = seq_to->bytes[0];
3172             }
3173
3174           /* No need to search.  */
3175           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3176         }
3177     }
3178
3179   if (ctype->tomap_done[1] == 0)
3180     /* "If this keyword [tolower] is not specified, the mapping shall be
3181        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3182     {
3183       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3184         if (ctype->map_collection[0][cnt] != 0)
3185           ELEM (ctype, map_collection, [1],
3186                 ctype->map_collection[0][cnt])
3187             = ctype->charnames[cnt];
3188
3189       for (cnt = 0; cnt < 256; ++cnt)
3190         if (ctype->map256_collection[0][cnt] != 0)
3191           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3192     }
3193
3194   if (ctype->outdigits_act == 0)
3195     {
3196       for (cnt = 0; cnt < 10; ++cnt)
3197         {
3198           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3199                                                          digits + cnt, 1);
3200
3201           if (ctype->mboutdigits[cnt] == NULL)
3202             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3203                                                            longnames[cnt],
3204                                                            strlen (longnames[cnt]));
3205
3206           if (ctype->mboutdigits[cnt] == NULL)
3207             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3208                                                            uninames[cnt], 9);
3209
3210           if (ctype->mboutdigits[cnt] == NULL)
3211             {
3212               /* Provide a replacement.  */
3213               error (0, 0, _("\
3214 no output digits defined and none of the standard names in the charmap"));
3215
3216               ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3217                                                        sizeof (struct charseq)
3218                                                        + 1);
3219
3220               /* This is better than nothing.  */
3221               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3222               ctype->mboutdigits[cnt]->nbytes = 1;
3223             }
3224
3225           ctype->wcoutdigits[cnt] = L'0' + cnt;
3226         }
3227
3228       ctype->outdigits_act = 10;
3229     }
3230 }
3231
3232
3233 /* Construction of sparse 3-level tables.
3234    See wchar-lookup.h for their structure and the meaning of p and q.  */
3235
3236 struct wctype_table
3237 {
3238   /* Parameters.  */
3239   unsigned int p;
3240   unsigned int q;
3241   /* Working representation.  */
3242   size_t level1_alloc;
3243   size_t level1_size;
3244   uint32_t *level1;
3245   size_t level2_alloc;
3246   size_t level2_size;
3247   uint32_t *level2;
3248   size_t level3_alloc;
3249   size_t level3_size;
3250   uint32_t *level3;
3251   /* Compressed representation.  */
3252   size_t result_size;
3253   char *result;
3254 };
3255
3256 /* Initialize.  Assumes t->p and t->q have already been set.  */
3257 static inline void
3258 wctype_table_init (struct wctype_table *t)
3259 {
3260   t->level1_alloc = t->level1_size = 0;
3261   t->level2_alloc = t->level2_size = 0;
3262   t->level3_alloc = t->level3_size = 0;
3263 }
3264
3265 /* Add one entry.  */
3266 static void
3267 wctype_table_add (struct wctype_table *t, uint32_t wc)
3268 {
3269   uint32_t index1 = wc >> (t->q + t->p + 5);
3270   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3271   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3272   uint32_t index4 = wc & 0x1f;
3273   size_t i, i1, i2;
3274
3275   if (index1 >= t->level1_size)
3276     {
3277       if (index1 >= t->level1_alloc)
3278         {
3279           size_t alloc = 2 * t->level1_alloc;
3280           if (alloc <= index1)
3281             alloc = index1 + 1;
3282           t->level1 = (t->level1_alloc > 0
3283                        ? (uint32_t *) xrealloc ((char *) t->level1,
3284                                                 alloc * sizeof (uint32_t))
3285                        : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3286           t->level1_alloc = alloc;
3287         }
3288       while (index1 >= t->level1_size)
3289         t->level1[t->level1_size++] = ~((uint32_t) 0);
3290     }
3291
3292   if (t->level1[index1] == ~((uint32_t) 0))
3293     {
3294       if (t->level2_size == t->level2_alloc)
3295         {
3296           size_t alloc = 2 * t->level2_alloc + 1;
3297           t->level2 = (t->level2_alloc > 0
3298                        ? (uint32_t *) xrealloc ((char *) t->level2,
3299                                                 (alloc << t->q) * sizeof (uint32_t))
3300                        : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3301           t->level2_alloc = alloc;
3302         }
3303       i1 = t->level2_size << t->q;
3304       i2 = (t->level2_size + 1) << t->q;
3305       for (i = i1; i < i2; i++)
3306         t->level2[i] = ~((uint32_t) 0);
3307       t->level1[index1] = t->level2_size++;
3308     }
3309
3310   index2 += t->level1[index1] << t->q;
3311
3312   if (t->level2[index2] == ~((uint32_t) 0))
3313     {
3314       if (t->level3_size == t->level3_alloc)
3315         {
3316           size_t alloc = 2 * t->level3_alloc + 1;
3317           t->level3 = (t->level3_alloc > 0
3318                        ? (uint32_t *) xrealloc ((char *) t->level3,
3319                                                 (alloc << t->p) * sizeof (uint32_t))
3320                        : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
3321           t->level3_alloc = alloc;
3322         }
3323       i1 = t->level3_size << t->p;
3324       i2 = (t->level3_size + 1) << t->p;
3325       for (i = i1; i < i2; i++)
3326         t->level3[i] = 0;
3327       t->level2[index2] = t->level3_size++;
3328     }
3329
3330   index3 += t->level2[index2] << t->p;
3331
3332   t->level3[index3] |= (uint32_t)1 << index4;
3333 }
3334
3335 /* Finalize and shrink.  */
3336 static void
3337 wctype_table_finalize (struct wctype_table *t)
3338 {
3339   size_t i, j, k;
3340   uint32_t reorder3[t->level3_size];
3341   uint32_t reorder2[t->level2_size];
3342   uint32_t level1_offset, level2_offset, level3_offset;
3343
3344   /* Uniquify level3 blocks.  */
3345   k = 0;
3346   for (j = 0; j < t->level3_size; j++)
3347     {
3348       for (i = 0; i < k; i++)
3349         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3350                     (1 << t->p) * sizeof (uint32_t)) == 0)
3351           break;
3352       /* Relocate block j to block i.  */
3353       reorder3[j] = i;
3354       if (i == k)
3355         {
3356           if (i != j)
3357             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3358                     (1 << t->p) * sizeof (uint32_t));
3359           k++;
3360         }
3361     }
3362   t->level3_size = k;
3363
3364   for (i = 0; i < (t->level2_size << t->q); i++)
3365     if (t->level2[i] != ~((uint32_t) 0))
3366       t->level2[i] = reorder3[t->level2[i]];
3367
3368   /* Uniquify level2 blocks.  */
3369   k = 0;
3370   for (j = 0; j < t->level2_size; j++)
3371     {
3372       for (i = 0; i < k; i++)
3373         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3374                     (1 << t->q) * sizeof (uint32_t)) == 0)
3375           break;
3376       /* Relocate block j to block i.  */
3377       reorder2[j] = i;
3378       if (i == k)
3379         {
3380           if (i != j)
3381             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3382                     (1 << t->q) * sizeof (uint32_t));
3383           k++;
3384         }
3385     }
3386   t->level2_size = k;
3387
3388   for (i = 0; i < t->level1_size; i++)
3389     if (t->level1[i] != ~((uint32_t) 0))
3390       t->level1[i] = reorder2[t->level1[i]];
3391
3392   /* Create and fill the resulting compressed representation.  */
3393   t->result_size =
3394     5 * sizeof (uint32_t)
3395     + t->level1_size * sizeof (uint32_t)
3396     + (t->level2_size << t->q) * sizeof (uint32_t)
3397     + (t->level3_size << t->p) * sizeof (uint32_t);
3398   t->result = (char *) xmalloc (t->result_size);
3399
3400   level1_offset =
3401     5 * sizeof (uint32_t);
3402   level2_offset =
3403     5 * sizeof (uint32_t)
3404     + t->level1_size * sizeof (uint32_t);
3405   level3_offset =
3406     5 * sizeof (uint32_t)
3407     + t->level1_size * sizeof (uint32_t)
3408     + (t->level2_size << t->q) * sizeof (uint32_t);
3409
3410   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3411   ((uint32_t *) t->result)[1] = t->level1_size;
3412   ((uint32_t *) t->result)[2] = t->p + 5;
3413   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3414   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3415
3416   for (i = 0; i < t->level1_size; i++)
3417     ((uint32_t *) (t->result + level1_offset))[i] =
3418       (t->level1[i] == ~((uint32_t) 0)
3419        ? 0
3420        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3421
3422   for (i = 0; i < (t->level2_size << t->q); i++)
3423     ((uint32_t *) (t->result + level2_offset))[i] =
3424       (t->level2[i] == ~((uint32_t) 0)
3425        ? 0
3426        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3427
3428   for (i = 0; i < (t->level3_size << t->p); i++)
3429     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3430
3431   if (t->level1_alloc > 0)
3432     free (t->level1);
3433   if (t->level2_alloc > 0)
3434     free (t->level2);
3435   if (t->level3_alloc > 0)
3436     free (t->level3);
3437 }
3438
3439 struct wcwidth_table
3440 {
3441   /* Parameters.  */
3442   unsigned int p;
3443   unsigned int q;
3444   /* Working representation.  */
3445   size_t level1_alloc;
3446   size_t level1_size;
3447   uint32_t *level1;
3448   size_t level2_alloc;
3449   size_t level2_size;
3450   uint32_t *level2;
3451   size_t level3_alloc;
3452   size_t level3_size;
3453   uint8_t *level3;
3454   /* Compressed representation.  */
3455   size_t result_size;
3456   char *result;
3457 };
3458
3459 /* Initialize.  Assumes t->p and t->q have already been set.  */
3460 static inline void
3461 wcwidth_table_init (struct wcwidth_table *t)
3462 {
3463   t->level1_alloc = t->level1_size = 0;
3464   t->level2_alloc = t->level2_size = 0;
3465   t->level3_alloc = t->level3_size = 0;
3466 }
3467
3468 /* Add one entry.  */
3469 static void
3470 wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width)
3471 {
3472   uint32_t index1 = wc >> (t->q + t->p);
3473   uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3474   uint32_t index3 = wc & ((1 << t->p) - 1);
3475   size_t i, i1, i2;
3476
3477   if (width == 0xff)
3478     return;
3479
3480   if (index1 >= t->level1_size)
3481     {
3482       if (index1 >= t->level1_alloc)
3483         {
3484           size_t alloc = 2 * t->level1_alloc;
3485           if (alloc <= index1)
3486             alloc = index1 + 1;
3487           t->level1 = (t->level1_alloc > 0
3488                        ? (uint32_t *) xrealloc ((char *) t->level1,
3489                                                 alloc * sizeof (uint32_t))
3490                        : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3491           t->level1_alloc = alloc;
3492         }
3493       while (index1 >= t->level1_size)
3494         t->level1[t->level1_size++] = ~((uint32_t) 0);
3495     }
3496
3497   if (t->level1[index1] == ~((uint32_t) 0))
3498     {
3499       if (t->level2_size == t->level2_alloc)
3500         {
3501           size_t alloc = 2 * t->level2_alloc + 1;
3502           t->level2 = (t->level2_alloc > 0
3503                        ? (uint32_t *) xrealloc ((char *) t->level2,
3504                                                 (alloc << t->q) * sizeof (uint32_t))
3505                        : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3506           t->level2_alloc = alloc;
3507         }
3508       i1 = t->level2_size << t->q;
3509       i2 = (t->level2_size + 1) << t->q;
3510       for (i = i1; i < i2; i++)
3511         t->level2[i] = ~((uint32_t) 0);
3512       t->level1[index1] = t->level2_size++;
3513     }
3514
3515   index2 += t->level1[index1] << t->q;
3516
3517   if (t->level2[index2] == ~((uint32_t) 0))
3518     {
3519       if (t->level3_size == t->level3_alloc)
3520         {
3521           size_t alloc = 2 * t->level3_alloc + 1;
3522           t->level3 = (t->level3_alloc > 0
3523                        ? (uint8_t *) xrealloc ((char *) t->level3,
3524                                                (alloc << t->p) * sizeof (uint8_t))
3525                        : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t)));
3526           t->level3_alloc = alloc;
3527         }
3528       i1 = t->level3_size << t->p;
3529       i2 = (t->level3_size + 1) << t->p;
3530       for (i = i1; i < i2; i++)
3531         t->level3[i] = 0xff;
3532       t->level2[index2] = t->level3_size++;
3533     }
3534
3535   index3 += t->level2[index2] << t->p;
3536
3537   t->level3[index3] = width;
3538 }
3539
3540 /* Finalize and shrink.  */
3541 static void
3542 wcwidth_table_finalize (struct wcwidth_table *t)
3543 {
3544   size_t i, j, k;
3545   uint32_t reorder3[t->level3_size];
3546   uint32_t reorder2[t->level2_size];
3547   uint32_t level1_offset, level2_offset, level3_offset, last_offset;
3548
3549   /* Uniquify level3 blocks.  */
3550   k = 0;
3551   for (j = 0; j < t->level3_size; j++)
3552     {
3553       for (i = 0; i < k; i++)
3554         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3555                     (1 << t->p) * sizeof (uint8_t)) == 0)
3556           break;
3557       /* Relocate block j to block i.  */
3558       reorder3[j] = i;
3559       if (i == k)
3560         {
3561           if (i != j)
3562             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3563                     (1 << t->p) * sizeof (uint8_t));
3564           k++;
3565         }
3566     }
3567   t->level3_size = k;
3568
3569   for (i = 0; i < (t->level2_size << t->q); i++)
3570     if (t->level2[i] != ~((uint32_t) 0))
3571       t->level2[i] = reorder3[t->level2[i]];
3572
3573   /* Uniquify level2 blocks.  */
3574   k = 0;
3575   for (j = 0; j < t->level2_size; j++)
3576     {
3577       for (i = 0; i < k; i++)
3578         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3579                     (1 << t->q) * sizeof (uint32_t)) == 0)
3580           break;
3581       /* Relocate block j to block i.  */
3582       reorder2[j] = i;
3583       if (i == k)
3584         {
3585           if (i != j)
3586             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3587                     (1 << t->q) * sizeof (uint32_t));
3588           k++;
3589         }
3590     }
3591   t->level2_size = k;
3592
3593   for (i = 0; i < t->level1_size; i++)
3594     if (t->level1[i] != ~((uint32_t) 0))
3595       t->level1[i] = reorder2[t->level1[i]];
3596
3597   /* Create and fill the resulting compressed representation.  */
3598   last_offset =
3599     5 * sizeof (uint32_t)
3600     + t->level1_size * sizeof (uint32_t)
3601     + (t->level2_size << t->q) * sizeof (uint32_t)
3602     + (t->level3_size << t->p) * sizeof (uint8_t);
3603   t->result_size = (last_offset + 3) & ~3ul;
3604   t->result = (char *) xmalloc (t->result_size);
3605
3606   level1_offset =
3607     5 * sizeof (uint32_t);
3608   level2_offset =
3609     5 * sizeof (uint32_t)
3610     + t->level1_size * sizeof (uint32_t);
3611   level3_offset =
3612     5 * sizeof (uint32_t)
3613     + t->level1_size * sizeof (uint32_t)
3614     + (t->level2_size << t->q) * sizeof (uint32_t);
3615
3616   ((uint32_t *) t->result)[0] = t->q + t->p;
3617   ((uint32_t *) t->result)[1] = t->level1_size;
3618   ((uint32_t *) t->result)[2] = t->p;
3619   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3620   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3621
3622   for (i = 0; i < t->level1_size; i++)
3623     ((uint32_t *) (t->result + level1_offset))[i] =
3624       (t->level1[i] == ~((uint32_t) 0)
3625        ? 0
3626        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3627
3628   for (i = 0; i < (t->level2_size << t->q); i++)
3629     ((uint32_t *) (t->result + level2_offset))[i] =
3630       (t->level2[i] == ~((uint32_t) 0)
3631        ? 0
3632        : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset);
3633
3634   for (i = 0; i < (t->level3_size << t->p); i++)
3635     ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i];
3636
3637   if (last_offset < t->result_size)
3638     memset (t->result + last_offset, 0, t->result_size - last_offset);
3639
3640   if (t->level1_alloc > 0)
3641     free (t->level1);
3642   if (t->level2_alloc > 0)
3643     free (t->level2);
3644   if (t->level3_alloc > 0)
3645     free (t->level3);
3646 }
3647
3648 struct wctrans_table
3649 {
3650   /* Parameters.  */
3651   unsigned int p;
3652   unsigned int q;
3653   /* Working representation.  */
3654   size_t level1_alloc;
3655   size_t level1_size;
3656   uint32_t *level1;
3657   size_t level2_alloc;
3658   size_t level2_size;
3659   uint32_t *level2;
3660   size_t level3_alloc;
3661   size_t level3_size;
3662   int32_t *level3;
3663   /* Compressed representation.  */
3664   size_t result_size;
3665   char *result;
3666 };
3667
3668 /* Initialize.  Assumes t->p and t->q have already been set.  */
3669 static inline void
3670 wctrans_table_init (struct wctrans_table *t)
3671 {
3672   t->level1_alloc = t->level1_size = 0;
3673   t->level2_alloc = t->level2_size = 0;
3674   t->level3_alloc = t->level3_size = 0;
3675 }
3676
3677 /* Add one entry.  */
3678 static void
3679 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3680 {
3681   uint32_t index1 = wc >> (t->q + t->p);
3682   uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
3683   uint32_t index3 = wc & ((1 << t->p) - 1);
3684   int32_t value = (int32_t) mapped_wc - (int32_t) wc;
3685   size_t i, i1, i2;
3686
3687   if (value == 0)
3688     return;
3689
3690   if (index1 >= t->level1_size)
3691     {
3692       if (index1 >= t->level1_alloc)
3693         {
3694           size_t alloc = 2 * t->level1_alloc;
3695           if (alloc <= index1)
3696             alloc = index1 + 1;
3697           t->level1 = (t->level1_alloc > 0
3698                        ? (uint32_t *) xrealloc ((char *) t->level1,
3699                                                 alloc * sizeof (uint32_t))
3700                        : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3701           t->level1_alloc = alloc;
3702         }
3703       while (index1 >= t->level1_size)
3704         t->level1[t->level1_size++] = ~((uint32_t) 0);
3705     }
3706
3707   if (t->level1[index1] == ~((uint32_t) 0))
3708     {
3709       if (t->level2_size == t->level2_alloc)
3710         {
3711           size_t alloc = 2 * t->level2_alloc + 1;
3712           t->level2 = (t->level2_alloc > 0
3713                        ? (uint32_t *) xrealloc ((char *) t->level2,
3714                                                 (alloc << t->q) * sizeof (uint32_t))
3715                        : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3716           t->level2_alloc = alloc;
3717         }
3718       i1 = t->level2_size << t->q;
3719       i2 = (t->level2_size + 1) << t->q;
3720       for (i = i1; i < i2; i++)
3721         t->level2[i] = ~((uint32_t) 0);
3722       t->level1[index1] = t->level2_size++;
3723     }
3724
3725   index2 += t->level1[index1] << t->q;
3726
3727   if (t->level2[index2] == ~((uint32_t) 0))
3728     {
3729       if (t->level3_size == t->level3_alloc)
3730         {
3731           size_t alloc = 2 * t->level3_alloc + 1;
3732           t->level3 = (t->level3_alloc > 0
3733                        ? (int32_t *) xrealloc ((char *) t->level3,
3734                                                (alloc << t->p) * sizeof (int32_t))
3735                        : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t)));
3736           t->level3_alloc = alloc;
3737         }
3738       i1 = t->level3_size << t->p;
3739       i2 = (t->level3_size + 1) << t->p;
3740       for (i = i1; i < i2; i++)
3741         t->level3[i] = 0;
3742       t->level2[index2] = t->level3_size++;
3743     }
3744
3745   index3 += t->level2[index2] << t->p;
3746
3747   t->level3[index3] = value;
3748 }
3749
3750 /* Finalize and shrink.  */
3751 static void
3752 wctrans_table_finalize (struct wctrans_table *t)
3753 {
3754   size_t i, j, k;
3755   uint32_t reorder3[t->level3_size];
3756   uint32_t reorder2[t->level2_size];
3757   uint32_t level1_offset, level2_offset, level3_offset;
3758
3759   /* Uniquify level3 blocks.  */
3760   k = 0;
3761   for (j = 0; j < t->level3_size; j++)
3762     {
3763       for (i = 0; i < k; i++)
3764         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3765                     (1 << t->p) * sizeof (int32_t)) == 0)
3766           break;
3767       /* Relocate block j to block i.  */
3768       reorder3[j] = i;
3769       if (i == k)
3770         {
3771           if (i != j)
3772             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3773                     (1 << t->p) * sizeof (int32_t));
3774           k++;
3775         }
3776     }
3777   t->level3_size = k;
3778
3779   for (i = 0; i < (t->level2_size << t->q); i++)
3780     if (t->level2[i] != ~((uint32_t) 0))
3781       t->level2[i] = reorder3[t->level2[i]];
3782
3783   /* Uniquify level2 blocks.  */
3784   k = 0;
3785   for (j = 0; j < t->level2_size; j++)
3786     {
3787       for (i = 0; i < k; i++)
3788         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3789                     (1 << t->q) * sizeof (uint32_t)) == 0)
3790           break;
3791       /* Relocate block j to block i.  */
3792       reorder2[j] = i;
3793       if (i == k)
3794         {
3795           if (i != j)
3796             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3797                     (1 << t->q) * sizeof (uint32_t));
3798           k++;
3799         }
3800     }
3801   t->level2_size = k;
3802
3803   for (i = 0; i < t->level1_size; i++)
3804     if (t->level1[i] != ~((uint32_t) 0))
3805       t->level1[i] = reorder2[t->level1[i]];
3806
3807   /* Create and fill the resulting compressed representation.  */
3808   t->result_size =
3809     5 * sizeof (uint32_t)
3810     + t->level1_size * sizeof (uint32_t)
3811     + (t->level2_size << t->q) * sizeof (uint32_t)
3812     + (t->level3_size << t->p) * sizeof (int32_t);
3813   t->result = (char *) xmalloc (t->result_size);
3814
3815   level1_offset =
3816     5 * sizeof (uint32_t);
3817   level2_offset =
3818     5 * sizeof (uint32_t)
3819     + t->level1_size * sizeof (uint32_t);
3820   level3_offset =
3821     5 * sizeof (uint32_t)
3822     + t->level1_size * sizeof (uint32_t)
3823     + (t->level2_size << t->q) * sizeof (uint32_t);
3824
3825   ((uint32_t *) t->result)[0] = t->q + t->p;
3826   ((uint32_t *) t->result)[1] = t->level1_size;
3827   ((uint32_t *) t->result)[2] = t->p;
3828   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3829   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3830
3831   for (i = 0; i < t->level1_size; i++)
3832     ((uint32_t *) (t->result + level1_offset))[i] =
3833       (t->level1[i] == ~((uint32_t) 0)
3834        ? 0
3835        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3836
3837   for (i = 0; i < (t->level2_size << t->q); i++)
3838     ((uint32_t *) (t->result + level2_offset))[i] =
3839       (t->level2[i] == ~((uint32_t) 0)
3840        ? 0
3841        : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset);
3842
3843   for (i = 0; i < (t->level3_size << t->p); i++)
3844     ((int32_t *) (t->result + level3_offset))[i] = t->level3[i];
3845
3846   if (t->level1_alloc > 0)
3847     free (t->level1);
3848   if (t->level2_alloc > 0)
3849     free (t->level2);
3850   if (t->level3_alloc > 0)
3851     free (t->level3);
3852 }
3853
3854
3855 static void
3856 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3857                  struct repertoire_t *repertoire)
3858 {
3859   size_t idx;
3860   size_t width_table_size;
3861   const void *key;
3862   size_t len;
3863   void *vdata;
3864   void *curs;
3865
3866   /* First we have to decide how we organize the arrays.  It is easy
3867      for a one-byte character set.  But multi-byte character set
3868      cannot be stored flat because the chars might be sparsely used.
3869      So we determine an optimal hashing function for the used
3870      characters.
3871
3872      We use a very trivial hashing function to store the sparse
3873      table.  CH % TABSIZE is used as an index.  To solve multiple hits
3874      we have N planes.  This guarantees a fixed search time for a
3875      character [N / 2].  In the following code we determine the minimum
3876      value for TABSIZE * N, where TABSIZE >= 256.
3877
3878      Some people complained that this algorithm takes too long.  Well,
3879      go on, improve it.  But changing the step size is *not* an
3880      option.  Some people changed this to use only sizes of prime
3881      numbers.  Think again, do some math.  We are looking for the
3882      optimal solution, not something which works in general.  Unless
3883      somebody can provide a dynamic programming solution I think this
3884      implementation is as good as it can get.  */
3885   size_t min_total = UINT_MAX;
3886   size_t act_size = 256;
3887
3888   if (oldstyle_tables)
3889     {
3890       if (!be_quiet && ctype->charnames_act > 512)
3891         fputs (_("\
3892 Computing table size for character classes might take a while..."),
3893                stderr);
3894
3895       /* While we want to have a small total size we are willing to use a
3896          little bit larger table if this reduces the number of layers.
3897          Therefore we add a little penalty to the number of planes.
3898          Maybe this constant has to be adjusted a bit.  */
3899 #define PENALTY 128
3900       do
3901         {
3902           size_t cnt[act_size];
3903           size_t act_planes = 1;
3904
3905           memset (cnt, '\0', sizeof cnt);
3906
3907           for (idx = 0; idx < 256; ++idx)
3908             cnt[idx] = 1;
3909
3910           for (idx = 0; idx < ctype->charnames_act; ++idx)
3911             if (ctype->charnames[idx] >= 256)
3912               {
3913                 size_t nr = ctype->charnames[idx] % act_size;
3914
3915                 if (++cnt[nr] > act_planes)
3916                   {
3917                     act_planes = cnt[nr];
3918                     if ((act_size + PENALTY) * act_planes >= min_total)
3919                       break;
3920                   }
3921               }
3922
3923           if ((act_size + PENALTY) * act_planes < min_total)
3924             {
3925               min_total = (act_size + PENALTY) * act_planes;
3926               ctype->plane_size = act_size;
3927               ctype->plane_cnt = act_planes;
3928             }
3929
3930           ++act_size;
3931         }
3932       while (act_size < min_total);
3933
3934       if (!be_quiet && ctype->charnames_act > 512)
3935         fputs (_(" done\n"), stderr);
3936
3937
3938       ctype->names = (uint32_t *) xcalloc (ctype->plane_size
3939                                            * ctype->plane_cnt,
3940                                            sizeof (uint32_t));
3941
3942       for (idx = 1; idx < 256; ++idx)
3943         ctype->names[idx] = idx;
3944
3945       /* Trick: change the 0th entry's name to 1 to mark the cell occupied.  */
3946       ctype->names[0] = 1;
3947
3948       for (idx = 256; idx < ctype->charnames_act; ++idx)
3949         {
3950           size_t nr = (ctype->charnames[idx] % ctype->plane_size);
3951           size_t depth = 0;
3952
3953           while (ctype->names[nr + depth * ctype->plane_size])
3954             ++depth;
3955           assert (depth < ctype->plane_cnt);
3956
3957           ctype->names[nr + depth * ctype->plane_size] = ctype->charnames[idx];
3958
3959           /* Now for faster access remember the index in the NAMES_B array.  */
3960           ctype->charnames[idx] = nr + depth * ctype->plane_size;
3961         }
3962       ctype->names[0] = 0;
3963     }
3964   else
3965     {
3966       ctype->plane_size = 0;
3967       ctype->plane_cnt = 0;
3968       ctype->names = NULL;
3969     }
3970
3971   /* You wonder about this amount of memory?  This is only because some
3972      users do not manage to address the array with unsigned values or
3973      data types with range >= 256.  '\200' would result in the array
3974      index -128.  To help these poor people we duplicate the entries for
3975      128 up to 255 below the entry for \0.  */
3976   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
3977                                              sizeof (char_class_t));
3978   ctype->ctype32_b = (char_class32_t *)
3979     xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
3980              sizeof (char_class32_t));
3981   if (!oldstyle_tables)
3982     ctype->class_3level = (struct iovec *)
3983       xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3984
3985   /* This is the array accessed using the multibyte string elements.  */
3986   for (idx = 0; idx < 256; ++idx)
3987     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3988
3989   /* Mirror first 127 entries.  We must take care that entry -1 is not
3990      mirrored because EOF == -1.  */
3991   for (idx = 0; idx < 127; ++idx)
3992     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3993
3994   if (oldstyle_tables)
3995     {
3996       /* The 32 bit array contains all characters.  */
3997       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3998         ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3999     }
4000   else
4001     {
4002       /* The 32 bit array contains all characters < 0x100.  */
4003       for (idx = 0; idx < ctype->class_collection_act; ++idx)
4004         if (ctype->charnames[idx] < 0x100)
4005           ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
4006     }
4007
4008   if (!oldstyle_tables)
4009     {
4010       size_t nr;
4011
4012       for (nr = 0; nr < ctype->nr_charclass; nr++)
4013         {
4014           struct wctype_table t;
4015
4016           t.p = 4; /* or: 5 */
4017           t.q = 7; /* or: 6 */
4018           wctype_table_init (&t);
4019
4020           for (idx = 0; idx < ctype->class_collection_act; ++idx)
4021             if (ctype->class_collection[idx] & _ISwbit (nr))
4022               wctype_table_add (&t, ctype->charnames[idx]);
4023
4024           wctype_table_finalize (&t);
4025
4026           if (verbose)
4027             fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
4028                      "LC_CTYPE", ctype->classnames[nr],
4029                      (unsigned long int) t.result_size);
4030
4031           ctype->class_3level[nr].iov_base = t.result;
4032           ctype->class_3level[nr].iov_len = t.result_size;
4033         }
4034     }
4035
4036   /* Room for table of mappings.  */
4037   ctype->map = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
4038   ctype->map32 = (uint32_t **) xmalloc (ctype->map_collection_nr
4039                                         * sizeof (uint32_t *));
4040   if (!oldstyle_tables)
4041     ctype->map_3level = (struct iovec *)
4042       xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
4043
4044   /* Fill in all mappings.  */
4045   for (idx = 0; idx < 2; ++idx)
4046     {
4047       unsigned int idx2;
4048
4049       /* Allocate table.  */
4050       ctype->map[idx] = (uint32_t *) xmalloc ((256 + 128) * sizeof (uint32_t));
4051
4052       /* Copy values from collection.  */
4053       for (idx2 = 0; idx2 < 256; ++idx2)
4054         ctype->map[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
4055
4056       /* Mirror first 127 entries.  We must take care not to map entry
4057          -1 because EOF == -1.  */
4058       for (idx2 = 0; idx2 < 127; ++idx2)
4059         ctype->map[idx][idx2] = ctype->map[idx][256 + idx2];
4060
4061       /* EOF must map to EOF.  */
4062       ctype->map[idx][127] = EOF;
4063     }
4064
4065   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
4066     {
4067       unsigned int idx2;
4068
4069       /* Allocate table.  */
4070       ctype->map32[idx] = (uint32_t *)
4071         xmalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256)
4072                  * sizeof (uint32_t));
4073
4074       /* Copy default value (identity mapping).  */
4075       if (oldstyle_tables)
4076         memcpy (ctype->map32[idx], ctype->names,
4077                 ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
4078       else
4079         for (idx2 = 0; idx2 < 256; ++idx2)
4080           ctype->map32[idx][idx2] = idx2;
4081
4082       /* Copy values from collection.  */
4083       for (idx2 = 0; idx2 < 256; ++idx2)
4084         if (ctype->map_collection[idx][idx2] != 0)
4085           ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
4086
4087       if (oldstyle_tables)
4088         while (idx2 < ctype->map_collection_act[idx])
4089           {
4090             if (ctype->map_collection[idx][idx2] != 0)
4091               ctype->map32[idx][ctype->charnames[idx2]] =
4092                 ctype->map_collection[idx][idx2];
4093             ++idx2;
4094           }
4095     }
4096
4097   if (!oldstyle_tables)
4098     {
4099       size_t nr;
4100
4101       for (nr = 0; nr < ctype->map_collection_nr; nr++)
4102         {
4103           struct wctrans_table t;
4104
4105           t.p = 7;
4106           t.q = 9;
4107           wctrans_table_init (&t);
4108
4109           for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
4110             if (ctype->map_collection[nr][idx] != 0)
4111               wctrans_table_add (&t, ctype->charnames[idx],
4112                                  ctype->map_collection[nr][idx]);
4113
4114           wctrans_table_finalize (&t);
4115
4116           if (verbose)
4117             fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
4118                      "LC_CTYPE", ctype->mapnames[nr],
4119                      (unsigned long int) t.result_size);
4120
4121           ctype->map_3level[nr].iov_base = t.result;
4122           ctype->map_3level[nr].iov_len = t.result_size;
4123         }
4124     }
4125
4126   /* Extra array for class and map names.  */
4127   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
4128                                                 * sizeof (uint32_t));
4129   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
4130                                               * sizeof (uint32_t));
4131
4132   if (oldstyle_tables)
4133     {
4134       ctype->class_offset = 0; /* not really used */
4135       ctype->map_offset = 0; /* not really used */
4136     }
4137   else
4138     {
4139       ctype->class_offset = _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
4140       ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
4141     }
4142
4143   /* Array for width information.  Because the expected width are very
4144      small we use only one single byte.  This saves space.  */
4145   if (oldstyle_tables)
4146     {
4147       width_table_size = (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul;
4148       ctype->width = (unsigned char *) xmalloc (width_table_size);
4149
4150       /* Initialize with -1.  */
4151       memset (ctype->width, '\xff', width_table_size);
4152       if (charmap->width_rules != NULL)
4153         {
4154           size_t cnt;
4155
4156           for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4157             {
4158               unsigned char bytes[charmap->mb_cur_max];
4159               int nbytes = charmap->width_rules[cnt].from->nbytes;
4160
4161               /* We have the range of character for which the width is
4162                  specified described using byte sequences of the multibyte
4163                  charset.  We have to convert this to UCS4 now.  And we
4164                  cannot simply convert the beginning and the end of the
4165                  sequence, we have to iterate over the byte sequence and
4166                  convert it for every single character.  */
4167               memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4168
4169               while (nbytes < charmap->width_rules[cnt].to->nbytes
4170                      || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4171                                 nbytes) <= 0)
4172                 {
4173                   /* Find the UCS value for `bytes'.  */
4174                   int inner;
4175                   uint32_t wch;
4176                   struct charseq *seq =
4177                     charmap_find_symbol (charmap, bytes, nbytes);
4178
4179                   if (seq == NULL)
4180                     wch = ILLEGAL_CHAR_VALUE;
4181                   else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4182                     wch = seq->ucs4;
4183                   else
4184                     wch = repertoire_find_value (ctype->repertoire, seq->name,
4185                                                  strlen (seq->name));
4186
4187                   if (wch != ILLEGAL_CHAR_VALUE)
4188                     {
4189                       /* Store the value.  */
4190                       size_t nr = wch % ctype->plane_size;
4191                       size_t depth = 0;
4192
4193                       while (ctype->names[nr + depth * ctype->plane_size] != wch)
4194                         {
4195                           ++depth;
4196                           assert (depth < ctype->plane_cnt);
4197                         }
4198
4199                       ctype->width[nr + depth * ctype->plane_size]
4200                         = charmap->width_rules[cnt].width;
4201                     }
4202
4203                   /* "Increment" the bytes sequence.  */
4204                   inner = nbytes - 1;
4205                   while (inner >= 0 && bytes[inner] == 0xff)
4206                     --inner;
4207
4208                   if (inner < 0)
4209                     {
4210                       /* We have to extend the byte sequence.  */
4211                       if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4212                         break;
4213
4214                       bytes[0] = 1;
4215                       memset (&bytes[1], 0, nbytes);
4216                       ++nbytes;
4217                     }
4218                   else
4219                     {
4220                       ++bytes[inner];
4221                       while (++inner < nbytes)
4222                         bytes[inner] = 0;
4223                     }
4224                 }
4225             }
4226         }
4227
4228       /* Now set all the other characters of the character set to the
4229          default width.  */
4230       curs = NULL;
4231       while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4232         {
4233           struct charseq *data = (struct charseq *) vdata;
4234           size_t nr;
4235           size_t depth;
4236
4237           if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4238             data->ucs4 = repertoire_find_value (ctype->repertoire,
4239                                                 data->name, len);
4240
4241           if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4242             {
4243               nr = data->ucs4 % ctype->plane_size;
4244               depth = 0;
4245
4246               while (ctype->names[nr + depth * ctype->plane_size] != data->ucs4)
4247                 {
4248                   ++depth;
4249                   assert (depth < ctype->plane_cnt);
4250                 }
4251
4252               if (ctype->width[nr + depth * ctype->plane_size]
4253                   == (unsigned char) '\xff')
4254                 ctype->width[nr + depth * ctype->plane_size] =
4255                   charmap->width_default;
4256             }
4257         }
4258     }
4259   else
4260     {
4261       struct wcwidth_table t;
4262
4263       t.p = 7;
4264       t.q = 9;
4265       wcwidth_table_init (&t);
4266
4267       /* First set all the characters of the character set to the default width.  */
4268       curs = NULL;
4269       while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
4270         {
4271           struct charseq *data = (struct charseq *) vdata;
4272
4273           if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
4274             data->ucs4 = repertoire_find_value (ctype->repertoire,
4275                                                 data->name, len);
4276
4277           if (data->ucs4 != ILLEGAL_CHAR_VALUE)
4278             wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4279         }
4280
4281       /* Now add the explicitly specified widths.  */
4282       if (charmap->width_rules != NULL)
4283         {
4284           size_t cnt;
4285
4286           for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4287             {
4288               unsigned char bytes[charmap->mb_cur_max];
4289               int nbytes = charmap->width_rules[cnt].from->nbytes;
4290
4291               /* We have the range of character for which the width is
4292                  specified described using byte sequences of the multibyte
4293                  charset.  We have to convert this to UCS4 now.  And we
4294                  cannot simply convert the beginning and the end of the
4295                  sequence, we have to iterate over the byte sequence and
4296                  convert it for every single character.  */
4297               memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4298
4299               while (nbytes < charmap->width_rules[cnt].to->nbytes
4300                      || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4301                                 nbytes) <= 0)
4302                 {
4303                   /* Find the UCS value for `bytes'.  */
4304                   int inner;
4305                   uint32_t wch;
4306                   struct charseq *seq =
4307                     charmap_find_symbol (charmap, bytes, nbytes);
4308
4309                   if (seq == NULL)
4310                     wch = ILLEGAL_CHAR_VALUE;
4311                   else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4312                     wch = seq->ucs4;
4313                   else
4314                     wch = repertoire_find_value (ctype->repertoire, seq->name,
4315                                                  strlen (seq->name));
4316
4317                   if (wch != ILLEGAL_CHAR_VALUE)
4318                     /* Store the value.  */
4319                     wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
4320
4321                   /* "Increment" the bytes sequence.  */
4322                   inner = nbytes - 1;
4323                   while (inner >= 0 && bytes[inner] == 0xff)
4324                     --inner;
4325
4326                   if (inner < 0)
4327                     {
4328                       /* We have to extend the byte sequence.  */
4329                       if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4330                         break;
4331
4332                       bytes[0] = 1;
4333                       memset (&bytes[1], 0, nbytes);
4334                       ++nbytes;
4335                     }
4336                   else
4337                     {
4338                       ++bytes[inner];
4339                       while (++inner < nbytes)
4340                         bytes[inner] = 0;
4341                     }
4342                 }
4343             }
4344         }
4345
4346       wcwidth_table_finalize (&t);
4347
4348       if (verbose)
4349         fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4350                  "LC_CTYPE", (unsigned long int) t.result_size);
4351
4352       ctype->width_3level.iov_base = t.result;
4353       ctype->width_3level.iov_len = t.result_size;
4354     }
4355
4356   /* Set MB_CUR_MAX.  */
4357   ctype->mb_cur_max = charmap->mb_cur_max;
4358
4359   /* Now determine the table for the transliteration information.
4360
4361      XXX It is not yet clear to me whether it is worth implementing a
4362      complicated algorithm which uses a hash table to locate the entries.
4363      For now I'll use a simple array which can be searching using binary
4364      search.  */
4365   if (ctype->translit_copy_locale != NULL)
4366     {
4367       /* Fold in the transliteration information from the locale mentioned
4368          in the `include' statement.  */
4369       struct locale_ctype_t *here = ctype;
4370
4371       do
4372         {
4373           struct localedef_t *other = find_locale (LC_CTYPE,
4374                                                    here->translit_copy_locale,
4375                                                    repertoire->name, charmap);
4376
4377           if (other == NULL)
4378             {
4379               error (0, 0, _("\
4380 %s: transliteration data from locale `%s' not available"),
4381                      "LC_CTYPE", here->translit_copy_locale);
4382               break;
4383             }
4384
4385           here = other->categories[LC_CTYPE].ctype;
4386
4387           /* Enqueue the information if necessary.  */
4388           if (here->translit != NULL)
4389             {
4390               struct translit_t *endp = here->translit;
4391               while (endp->next != NULL)
4392                 endp = endp->next;
4393
4394               endp->next = ctype->translit;
4395               ctype->translit = here->translit;
4396             }
4397         }
4398       while (here->translit_copy_locale != NULL);
4399     }
4400
4401   if (ctype->translit != NULL)
4402     {
4403       /* First count how many entries we have.  This is the upper limit
4404          since some entries from the included files might be overwritten.  */
4405       size_t number = 0;
4406       size_t cnt;
4407       struct translit_t *runp = ctype->translit;
4408       struct translit_t **sorted;
4409       size_t from_len, to_len;
4410
4411       while (runp != NULL)
4412         {
4413           ++number;
4414           runp = runp->next;
4415         }
4416
4417       /* Next we allocate an array large enough and fill in the values.  */
4418       sorted = (struct translit_t **) alloca (number
4419                                               * sizeof (struct translit_t **));
4420       runp = ctype->translit;
4421       number = 0;
4422       do
4423         {
4424           /* Search for the place where to insert this string.
4425              XXX Better use a real sorting algorithm later.  */
4426           size_t idx = 0;
4427           int replace = 0;
4428
4429           while (idx < number)
4430             {
4431               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4432                                 (const wchar_t *) runp->from);
4433               if (res == 0)
4434                 {
4435                   replace = 1;
4436                   break;
4437                 }
4438               if (res > 0)
4439                 break;
4440               ++idx;
4441             }
4442
4443           if (replace)
4444             sorted[idx] = runp;
4445           else
4446             {
4447               memmove (&sorted[idx + 1], &sorted[idx],
4448                        (number - idx) * sizeof (struct translit_t *));
4449               sorted[idx] = runp;
4450               ++number;
4451             }
4452
4453           runp = runp->next;
4454         }
4455       while (runp != NULL);
4456
4457       /* The next step is putting all the possible transliteration
4458          strings in one memory block so that we can write it out.
4459          We need several different blocks:
4460          - index to the from-string array
4461          - from-string array
4462          - index to the to-string array
4463          - to-string array.
4464       */
4465       from_len = to_len = 0;
4466       for (cnt = 0; cnt < number; ++cnt)
4467         {
4468           struct translit_to_t *srunp;
4469           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4470           srunp = sorted[cnt]->to;
4471           while (srunp != NULL)
4472             {
4473               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4474               srunp = srunp->next;
4475             }
4476           /* Plus one for the extra NUL character marking the end of
4477              the list for the current entry.  */
4478           ++to_len;
4479         }
4480
4481       /* We can allocate the arrays for the results.  */
4482       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4483       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4484       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4485       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4486
4487       from_len = 0;
4488       to_len = 0;
4489       for (cnt = 0; cnt < number; ++cnt)
4490         {
4491           size_t len;
4492           struct translit_to_t *srunp;
4493
4494           ctype->translit_from_idx[cnt] = from_len;
4495           ctype->translit_to_idx[cnt] = to_len;
4496
4497           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4498           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4499                    (const wchar_t *) sorted[cnt]->from, len);
4500           from_len += len;
4501
4502           ctype->translit_to_idx[cnt] = to_len;
4503           srunp = sorted[cnt]->to;
4504           while (srunp != NULL)
4505             {
4506               len = wcslen ((const wchar_t *) srunp->str) + 1;
4507               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4508                        (const wchar_t *) srunp->str, len);
4509               to_len += len;
4510               srunp = srunp->next;
4511             }
4512           ctype->translit_to_tbl[to_len++] = L'\0';
4513         }
4514
4515       /* Store the information about the length.  */
4516       ctype->translit_idx_size = number;
4517       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4518       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4519     }
4520   else
4521     {
4522       /* Provide some dummy pointers since we have nothing to write out.  */
4523       static uint32_t no_str = { 0 };
4524
4525       ctype->translit_from_idx = &no_str;
4526       ctype->translit_from_tbl = &no_str;
4527       ctype->translit_to_tbl = &no_str;
4528       ctype->translit_idx_size = 0;
4529       ctype->translit_from_tbl_size = 0;
4530       ctype->translit_to_tbl_size = 0;
4531     }
4532 }