locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <alloca.h>
  25 #include <byteswap.h>
  26 #include <endian.h>
  27 #include <errno.h>
  28 #include <limits.h>
  29 #include <obstack.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <wchar.h>
  33 #include <wctype.h>
  34 #include <sys/uio.h>
  35
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42 #include "localedef.h"
  43
  44 #include <assert.h>
  45
  46
  47 #ifdef PREDEFINED_CLASSES
  48 /* These are the extra bits not in wctype.h since these are not preallocated
  49    classes.  */
  50 # define _ISwspecial1   (1 << 29)
  51 # define _ISwspecial2   (1 << 30)
  52 # define _ISwspecial3   (1 << 31)
  53 #endif
  54
  55
  56 /* The bit used for representing a special class.  */
  57 #define BITPOS(class) ((class) - tok_upper)
  58 #define BIT(class) (_ISbit (BITPOS (class)))
  59 #define BITw(class) (_ISwbit (BITPOS (class)))
  60
  61 #define ELEM(ctype, collection, idx, value)                                   \
  62   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  63              &ctype->collection##_act idx, value)
  64
  65
  66 /* To be compatible with former implementations we for now restrict
  67    the number of bits for character classes to 16.  When compatibility
  68    is not necessary anymore increase the number to 32.  */
  69 #define char_class_t uint16_t
  70 #define char_class32_t uint32_t
  71
  72
  73 /* Type to describe a transliteration action.  We have a possibly
  74    multiple character from-string and a set of multiple character
  75    to-strings.  All are 32bit values since this is what is used in
  76    the gconv functions.  */
  77 struct translit_to_t
  78 {
  79   uint32_t *str;
  80
  81   struct translit_to_t *next;
  82 };
  83
  84 struct translit_t
  85 {
  86   uint32_t *from;
  87
  88   const char *fname;
  89   size_t lineno;
  90
  91   struct translit_to_t *to;
  92
  93   struct translit_t *next;
  94 };
  95
  96 struct translit_ignore_t
  97 {
  98   uint32_t from;
  99   uint32_t to;
 100   uint32_t step;
 101
 102   const char *fname;
 103   size_t lineno;
 104
 105   struct translit_ignore_t *next;
 106 };
 107
 108
 109 /* The real definition of the struct for the LC_CTYPE locale.  */
 110 struct locale_ctype_t
 111 {
 112   uint32_t *charnames;
 113   size_t charnames_max;
 114   size_t charnames_act;
 115   /* An index lookup table, to speedup find_idx.  */
 116 #define MAX_CHARNAMES_IDX 0x10000
 117   uint32_t *charnames_idx;
 118
 119   struct repertoire_t *repertoire;
 120
 121   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 122 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 123   size_t nr_charclass;
 124   const char *classnames[MAX_NR_CHARCLASS];
 125   uint32_t last_class_char;
 126   uint32_t class256_collection[256];
 127   uint32_t *class_collection;
 128   size_t class_collection_max;
 129   size_t class_collection_act;
 130   uint32_t class_done;
 131   uint32_t class_offset;
 132
 133   struct charseq **mbdigits;
 134   size_t mbdigits_act;
 135   size_t mbdigits_max;
 136   uint32_t *wcdigits;
 137   size_t wcdigits_act;
 138   size_t wcdigits_max;
 139
 140   struct charseq *mboutdigits[10];
 141   uint32_t wcoutdigits[10];
 142   size_t outdigits_act;
 143
 144   /* If the following number ever turns out to be too small simply
 145      increase it.  But I doubt it will.  --drepper@gnu */
 146 #define MAX_NR_CHARMAP 16
 147   const char *mapnames[MAX_NR_CHARMAP];
 148   uint32_t *map_collection[MAX_NR_CHARMAP];
 149   uint32_t map256_collection[2][256];
 150   size_t map_collection_max[MAX_NR_CHARMAP];
 151   size_t map_collection_act[MAX_NR_CHARMAP];
 152   size_t map_collection_nr;
 153   size_t last_map_idx;
 154   int tomap_done[MAX_NR_CHARMAP];
 155   uint32_t map_offset;
 156
 157   /* Transliteration information.  */
 158   const char *translit_copy_locale;
 159   const char *translit_copy_repertoire;
 160   struct translit_t *translit;
 161   struct translit_ignore_t *translit_ignore;
 162   uint32_t ntranslit_ignore;
 163
 164   uint32_t *default_missing;
 165   const char *default_missing_file;
 166   size_t default_missing_lineno;
 167
 168   /* The arrays for the binary representation.  */
 169   char_class_t *ctype_b;
 170   char_class32_t *ctype32_b;
 171   uint32_t **map_b;
 172   uint32_t **map32_b;
 173   uint32_t **class_b;
 174   struct iovec *class_3level;
 175   struct iovec *map_3level;
 176   uint32_t *class_name_ptr;
 177   uint32_t *map_name_ptr;
 178   struct iovec width;
 179   uint32_t mb_cur_max;
 180   const char *codeset_name;
 181   uint32_t *translit_from_idx;
 182   uint32_t *translit_from_tbl;
 183   uint32_t *translit_to_idx;
 184   uint32_t *translit_to_tbl;
 185   uint32_t translit_idx_size;
 186   size_t translit_from_tbl_size;
 187   size_t translit_to_tbl_size;
 188
 189   struct obstack mempool;
 190 };
 191
 192
 193 #define obstack_chunk_alloc xmalloc
 194 #define obstack_chunk_free free
 195
 196
 197 /* Prototypes for local functions.  */
 198 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 199                            struct charmap_t *charmap,
 200                            struct localedef_t *copy_locale,
 201                            int ignore_content);
 202 static void ctype_class_new (struct linereader *lr,
 203                              struct locale_ctype_t *ctype, const char *name);
 204 static void ctype_map_new (struct linereader *lr,
 205                            struct locale_ctype_t *ctype,
 206                            const char *name, struct charmap_t *charmap);
 207 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 208                            size_t *max, size_t *act, unsigned int idx);
 209 static void set_class_defaults (struct locale_ctype_t *ctype,
 210                                 struct charmap_t *charmap,
 211                                 struct repertoire_t *repertoire);
 212 static void allocate_arrays (struct locale_ctype_t *ctype,
 213                              struct charmap_t *charmap,
 214                              struct repertoire_t *repertoire);
 215
 216
 217 static const char *longnames[] =
 218 {
 219   "zero", "one", "two", "three", "four",
 220   "five", "six", "seven", "eight", "nine"
 221 };
 222 static const char *uninames[] =
 223 {
 224   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 225   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 226 };
 227 static const unsigned char digits[] = "0123456789";
 228
 229
 230 static void
 231 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 232                struct charmap_t *charmap, struct localedef_t *copy_locale,
 233                int ignore_content)
 234 {
 235   unsigned int cnt;
 236   struct locale_ctype_t *ctype;
 237
 238   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 239     {
 240       if (copy_locale == NULL)
 241         {
 242           /* Allocate the needed room.  */
 243           locale->categories[LC_CTYPE].ctype = ctype =
 244             (struct locale_ctype_t *) xcalloc (1,
 245                                                sizeof (struct locale_ctype_t));
 246
 247           /* We have seen no names yet.  */
 248           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 249           ctype->charnames =
 250             (unsigned int *) xmalloc (ctype->charnames_max
 251                                       * sizeof (unsigned int));
 252           for (cnt = 0; cnt < 256; ++cnt)
 253             ctype->charnames[cnt] = cnt;
 254           ctype->charnames_act = 256;
 255           ctype->charnames_idx =
 256             (uint32_t *) xmalloc (MAX_CHARNAMES_IDX * sizeof (uint32_t));
 257           for (cnt = 0; cnt < MAX_CHARNAMES_IDX; ++cnt)
 258             ctype->charnames_idx[cnt] = ~((uint32_t) 0);
 259
 260           /* Fill character class information.  */
 261           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 262           /* The order of the following instructions determines the bit
 263              positions!  */
 264           ctype_class_new (lr, ctype, "upper");
 265           ctype_class_new (lr, ctype, "lower");
 266           ctype_class_new (lr, ctype, "alpha");
 267           ctype_class_new (lr, ctype, "digit");
 268           ctype_class_new (lr, ctype, "xdigit");
 269           ctype_class_new (lr, ctype, "space");
 270           ctype_class_new (lr, ctype, "print");
 271           ctype_class_new (lr, ctype, "graph");
 272           ctype_class_new (lr, ctype, "blank");
 273           ctype_class_new (lr, ctype, "cntrl");
 274           ctype_class_new (lr, ctype, "punct");
 275           ctype_class_new (lr, ctype, "alnum");
 276 #ifdef PREDEFINED_CLASSES
 277           /* The following are extensions from ISO 14652.  */
 278           ctype_class_new (lr, ctype, "left_to_right");
 279           ctype_class_new (lr, ctype, "right_to_left");
 280           ctype_class_new (lr, ctype, "num_terminator");
 281           ctype_class_new (lr, ctype, "num_separator");
 282           ctype_class_new (lr, ctype, "segment_separator");
 283           ctype_class_new (lr, ctype, "block_separator");
 284           ctype_class_new (lr, ctype, "direction_control");
 285           ctype_class_new (lr, ctype, "sym_swap_layout");
 286           ctype_class_new (lr, ctype, "char_shape_selector");
 287           ctype_class_new (lr, ctype, "num_shape_selector");
 288           ctype_class_new (lr, ctype, "non_spacing");
 289           ctype_class_new (lr, ctype, "non_spacing_level3");
 290           ctype_class_new (lr, ctype, "normal_connect");
 291           ctype_class_new (lr, ctype, "r_connect");
 292           ctype_class_new (lr, ctype, "no_connect");
 293           ctype_class_new (lr, ctype, "no_connect-space");
 294           ctype_class_new (lr, ctype, "vowel_connect");
 295 #endif
 296
 297           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 298           ctype->class_collection
 299             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 300                                     ctype->class_collection_max);
 301           ctype->class_collection_act = 256;
 302
 303           /* Fill character map information.  */
 304           ctype->last_map_idx = MAX_NR_CHARMAP;
 305           ctype_map_new (lr, ctype, "toupper", charmap);
 306           ctype_map_new (lr, ctype, "tolower", charmap);
 307 #ifdef PREDEFINED_CLASSES
 308           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 309 #endif
 310
 311           /* Fill first 256 entries in `toXXX' arrays.  */
 312           for (cnt = 0; cnt < 256; ++cnt)
 313             {
 314               ctype->map_collection[0][cnt] = cnt;
 315               ctype->map_collection[1][cnt] = cnt;
 316 #ifdef PREDEFINED_CLASSES
 317               ctype->map_collection[2][cnt] = cnt;
 318 #endif
 319               ctype->map256_collection[0][cnt] = cnt;
 320               ctype->map256_collection[1][cnt] = cnt;
 321             }
 322
 323           obstack_init (&ctype->mempool);
 324         }
 325       else
 326         ctype = locale->categories[LC_CTYPE].ctype =
 327           copy_locale->categories[LC_CTYPE].ctype;
 328     }
 329 }
 330
 331
 332 void
 333 ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
 334 {
 335   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 336 #define NCLASS 12
 337   static const struct
 338   {
 339     const char *name;
 340     const char allow[NCLASS];
 341   }
 342   valid_table[NCLASS] =
 343   {
 344     /* The order is important.  See token.h for more information.
 345        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 346     { "upper",  "--MX-XDDXXX-" },
 347     { "lower",  "--MX-XDDXXX-" },
 348     { "alpha",  "---X-XDDXXX-" },
 349     { "digit",  "XXX--XDDXXX-" },
 350     { "xdigit", "-----XDDXXX-" },
 351     { "space",  "XXXXX------X" },
 352     { "print",  "---------X--" },
 353     { "graph",  "---------X--" },
 354     { "blank",  "XXXXXM-----X" },
 355     { "cntrl",  "XXXXX-XX--XX" },
 356     { "punct",  "XXXXX-DD-X-X" },
 357     { "alnum",  "-----XDDXXX-" }
 358   };
 359   size_t cnt;
 360   int cls1, cls2;
 361   uint32_t space_value;
 362   struct charseq *space_seq;
 363   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 364   int warned;
 365   const void *key;
 366   size_t len;
 367   void *vdata;
 368   void *curs;
 369
 370   /* Now resolve copying and also handle completely missing definitions.  */
 371   if (ctype == NULL)
 372     {
 373       const char *repertoire_name;
 374
 375       /* First see whether we were supposed to copy.  If yes, find the
 376          actual definition.  */
 377       if (locale->copy_name[LC_CTYPE] != NULL)
 378         {
 379           /* Find the copying locale.  This has to happen transitively since
 380              the locale we are copying from might also copying another one.  */
 381           struct localedef_t *from = locale;
 382
 383           do
 384             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 385                                 from->repertoire_name, charmap);
 386           while (from->categories[LC_CTYPE].ctype == NULL
 387                  && from->copy_name[LC_CTYPE] != NULL);
 388
 389           ctype = locale->categories[LC_CTYPE].ctype
 390             = from->categories[LC_CTYPE].ctype;
 391         }
 392
 393       /* If there is still no definition issue an warning and create an
 394          empty one.  */
 395       if (ctype == NULL)
 396         {
 397           if (! be_quiet)
 398             error (0, 0, _("No definition for %s category found"), "LC_CTYPE");
 399           ctype_startup (NULL, locale, charmap, NULL, 0);
 400           ctype = locale->categories[LC_CTYPE].ctype;
 401         }
 402
 403       /* Get the repertoire we have to use.  */
 404       repertoire_name = locale->repertoire_name ?: repertoire_global;
 405       if (repertoire_name != NULL)
 406         ctype->repertoire = repertoire_read (repertoire_name);
 407     }
 408
 409   /* We need the name of the currently used 8-bit character set to
 410      make correct conversion between this 8-bit representation and the
 411      ISO 10646 character set used internally for wide characters.  */
 412   ctype->codeset_name = charmap->code_set_name;
 413   if (ctype->codeset_name == NULL)
 414     {
 415       if (! be_quiet)
 416         error (0, 0, _("No character set name specified in charmap"));
 417       ctype->codeset_name = "//UNKNOWN//";
 418     }
 419
 420   /* Set default value for classes not specified.  */
 421   set_class_defaults (ctype, charmap, ctype->repertoire);
 422
 423   /* Check according to table.  */
 424   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 425     {
 426       uint32_t tmp = ctype->class_collection[cnt];
 427
 428       if (tmp != 0)
 429         {
 430           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 431             if ((tmp & _ISwbit (cls1)) != 0)
 432               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 433                 if (valid_table[cls1].allow[cls2] != '-')
 434                   {
 435                     int eq = (tmp & _ISwbit (cls2)) != 0;
 436                     switch (valid_table[cls1].allow[cls2])
 437                       {
 438                       case 'M':
 439                         if (!eq)
 440                           {
 441                             uint32_t value = ctype->charnames[cnt];
 442
 443                             if (!be_quiet)
 444                               error (0, 0, _("\
 445 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 446                                      value > 0xffff ? 8 : 4, value,
 447                                      valid_table[cls1].name,
 448                                      valid_table[cls2].name);
 449                           }
 450                         break;
 451
 452                       case 'X':
 453                         if (eq)
 454                           {
 455                             uint32_t value = ctype->charnames[cnt];
 456
 457                             if (!be_quiet)
 458                               error (0, 0, _("\
 459 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 460                                      value > 0xffff ? 8 : 4, value,
 461                                      valid_table[cls1].name,
 462                                      valid_table[cls2].name);
 463                           }
 464                         break;
 465
 466                       case 'D':
 467                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 468                         break;
 469
 470                       default:
 471                         error (5, 0, _("internal error in %s, line %u"),
 472                                __FUNCTION__, __LINE__);
 473                       }
 474                   }
 475         }
 476     }
 477
 478   for (cnt = 0; cnt < 256; ++cnt)
 479     {
 480       uint32_t tmp = ctype->class256_collection[cnt];
 481
 482       if (tmp != 0)
 483         {
 484           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 485             if ((tmp & _ISbit (cls1)) != 0)
 486               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 487                 if (valid_table[cls1].allow[cls2] != '-')
 488                   {
 489                     int eq = (tmp & _ISbit (cls2)) != 0;
 490                     switch (valid_table[cls1].allow[cls2])
 491                       {
 492                       case 'M':
 493                         if (!eq)
 494                           {
 495                             char buf[17];
 496
 497                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 498
 499                             if (!be_quiet)
 500                               error (0, 0, _("\
 501 character '%s' in class `%s' must be in class `%s'"),
 502                                      buf, valid_table[cls1].name,
 503                                      valid_table[cls2].name);
 504                           }
 505                         break;
 506
 507                       case 'X':
 508                         if (eq)
 509                           {
 510                             char buf[17];
 511
 512                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 513
 514                             if (!be_quiet)
 515                               error (0, 0, _("\
 516 character '%s' in class `%s' must not be in class `%s'"),
 517                                      buf, valid_table[cls1].name,
 518                                      valid_table[cls2].name);
 519                           }
 520                         break;
 521
 522                       case 'D':
 523                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 524                         break;
 525
 526                       default:
 527                         error (5, 0, _("internal error in %s, line %u"),
 528                                __FUNCTION__, __LINE__);
 529                       }
 530                   }
 531         }
 532     }
 533
 534   /* ... and now test <SP> as a special case.  */
 535   space_value = 32;
 536   if (((cnt = BITPOS (tok_space),
 537         (ELEM (ctype, class_collection, , space_value)
 538          & BITw (tok_space)) == 0)
 539        || (cnt = BITPOS (tok_blank),
 540            (ELEM (ctype, class_collection, , space_value)
 541             & BITw (tok_blank)) == 0)))
 542     {
 543       if (!be_quiet)
 544         error (0, 0, _("<SP> character not in class `%s'"),
 545                valid_table[cnt].name);
 546     }
 547   else if (((cnt = BITPOS (tok_punct),
 548              (ELEM (ctype, class_collection, , space_value)
 549               & BITw (tok_punct)) != 0)
 550             || (cnt = BITPOS (tok_graph),
 551                 (ELEM (ctype, class_collection, , space_value)
 552                  & BITw (tok_graph))
 553                 != 0)))
 554     {
 555       if (!be_quiet)
 556         error (0, 0, _("<SP> character must not be in class `%s'"),
 557                valid_table[cnt].name);
 558     }
 559   else
 560     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 561
 562   space_seq = charmap_find_value (charmap, "SP", 2);
 563   if (space_seq == NULL)
 564     space_seq = charmap_find_value (charmap, "space", 5);
 565   if (space_seq == NULL)
 566     space_seq = charmap_find_value (charmap, "U00000020", 9);
 567   if (space_seq == NULL || space_seq->nbytes != 1)
 568     {
 569       if (!be_quiet)
 570         error (0, 0, _("character <SP> not defined in character map"));
 571     }
 572   else if (((cnt = BITPOS (tok_space),
 573              (ctype->class256_collection[space_seq->bytes[0]]
 574               & BIT (tok_space)) == 0)
 575             || (cnt = BITPOS (tok_blank),
 576                 (ctype->class256_collection[space_seq->bytes[0]]
 577                  & BIT (tok_blank)) == 0)))
 578     {
 579       if (!be_quiet)
 580         error (0, 0, _("<SP> character not in class `%s'"),
 581                valid_table[cnt].name);
 582     }
 583   else if (((cnt = BITPOS (tok_punct),
 584              (ctype->class256_collection[space_seq->bytes[0]]
 585               & BIT (tok_punct)) != 0)
 586             || (cnt = BITPOS (tok_graph),
 587                 (ctype->class256_collection[space_seq->bytes[0]]
 588                  & BIT (tok_graph)) != 0)))
 589     {
 590       if (!be_quiet)
 591         error (0, 0, _("<SP> character must not be in class `%s'"),
 592                valid_table[cnt].name);
 593     }
 594   else
 595     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 596
 597   /* Now that the tests are done make sure the name array contains all
 598      characters which are handled in the WIDTH section of the
 599      character set definition file.  */
 600   if (charmap->width_rules != NULL)
 601     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 602       {
 603         unsigned char bytes[charmap->mb_cur_max];
 604         int nbytes = charmap->width_rules[cnt].from->nbytes;
 605
 606         /* We have the range of character for which the width is
 607            specified described using byte sequences of the multibyte
 608            charset.  We have to convert this to UCS4 now.  And we
 609            cannot simply convert the beginning and the end of the
 610            sequence, we have to iterate over the byte sequence and
 611            convert it for every single character.  */
 612         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 613
 614         while (nbytes < charmap->width_rules[cnt].to->nbytes
 615                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 616                           nbytes) <= 0)
 617           {
 618             /* Find the UCS value for `bytes'.  */
 619             int inner;
 620             uint32_t wch;
 621             struct charseq *seq = charmap_find_symbol (charmap, bytes, nbytes);
 622
 623             if (seq == NULL)
 624               wch = ILLEGAL_CHAR_VALUE;
 625             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 626               wch = seq->ucs4;
 627             else
 628               wch = repertoire_find_value (ctype->repertoire, seq->name,
 629                                            strlen (seq->name));
 630
 631             if (wch != ILLEGAL_CHAR_VALUE)
 632               /* We are only interested in the side-effects of the
 633                  `find_idx' call.  It will add appropriate entries in
 634                  the name array if this is necessary.  */
 635               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 636
 637             /* "Increment" the bytes sequence.  */
 638             inner = nbytes - 1;
 639             while (inner >= 0 && bytes[inner] == 0xff)
 640               --inner;
 641
 642             if (inner < 0)
 643               {
 644                 /* We have to extend the byte sequence.  */
 645                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 646                   break;
 647
 648                 bytes[0] = 1;
 649                 memset (&bytes[1], 0, nbytes);
 650                 ++nbytes;
 651               }
 652             else
 653               {
 654                 ++bytes[inner];
 655                 while (++inner < nbytes)
 656                   bytes[inner] = 0;
 657               }
 658           }
 659       }
 660
 661   /* Now set all the other characters of the character set to the
 662      default width.  */
 663   curs = NULL;
 664   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 665     {
 666       struct charseq *data = (struct charseq *) vdata;
 667
 668       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 669         data->ucs4 = repertoire_find_value (ctype->repertoire,
 670                                             data->name, len);
 671
 672       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 673         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 674     }
 675
 676   /* There must be a multiple of 10 digits.  */
 677   if (ctype->mbdigits_act % 10 != 0)
 678     {
 679       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 680       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 681       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 682       error (0, 0, _("`digit' category has not entries in groups of ten"));
 683     }
 684
 685   /* Check the input digits.  There must be a multiple of ten available.
 686      In each group it could be that one or the other character is missing.
 687      In this case the whole group must be removed.  */
 688   cnt = 0;
 689   while (cnt < ctype->mbdigits_act)
 690     {
 691       size_t inner;
 692       for (inner = 0; inner < 10; ++inner)
 693         if (ctype->mbdigits[cnt + inner] == NULL)
 694           break;
 695
 696       if (inner == 10)
 697         cnt += 10;
 698       else
 699         {
 700           /* Remove the group.  */
 701           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 702                    ((ctype->wcdigits_act - cnt - 10)
 703                     * sizeof (ctype->mbdigits[0])));
 704           ctype->mbdigits_act -= 10;
 705         }
 706     }
 707
 708   /* If no input digits are given use the default.  */
 709   if (ctype->mbdigits_act == 0)
 710     {
 711       if (ctype->mbdigits_max == 0)
 712         {
 713           ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
 714                                            10 * sizeof (struct charseq *));
 715           ctype->mbdigits_max = 10;
 716         }
 717
 718       for (cnt = 0; cnt < 10; ++cnt)
 719         {
 720           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 721                                                       digits + cnt, 1);
 722           if (ctype->mbdigits[cnt] == NULL)
 723             {
 724               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 725                                                           longnames[cnt],
 726                                                           strlen (longnames[cnt]));
 727               if (ctype->mbdigits[cnt] == NULL)
 728                 {
 729                   /* Hum, this ain't good.  */
 730                   error (0, 0, _("\
 731 no input digits defined and none of the standard names in the charmap"));
 732
 733                   ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
 734                                                         sizeof (struct charseq) + 1);
 735
 736                   /* This is better than nothing.  */
 737                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 738                   ctype->mbdigits[cnt]->nbytes = 1;
 739                 }
 740             }
 741         }
 742
 743       ctype->mbdigits_act = 10;
 744     }
 745
 746   /* Check the wide character input digits.  There must be a multiple
 747      of ten available.  In each group it could be that one or the other
 748      character is missing.  In this case the whole group must be
 749      removed.  */
 750   cnt = 0;
 751   while (cnt < ctype->wcdigits_act)
 752     {
 753       size_t inner;
 754       for (inner = 0; inner < 10; ++inner)
 755         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 756           break;
 757
 758       if (inner == 10)
 759         cnt += 10;
 760       else
 761         {
 762           /* Remove the group.  */
 763           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 764                    ((ctype->wcdigits_act - cnt - 10)
 765                     * sizeof (ctype->wcdigits[0])));
 766           ctype->wcdigits_act -= 10;
 767         }
 768     }
 769
 770   /* If no input digits are given use the default.  */
 771   if (ctype->wcdigits_act == 0)
 772     {
 773       if (ctype->wcdigits_max == 0)
 774         {
 775           ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
 776                                            10 * sizeof (uint32_t));
 777           ctype->wcdigits_max = 10;
 778         }
 779
 780       for (cnt = 0; cnt < 10; ++cnt)
 781         ctype->wcdigits[cnt] = L'0' + cnt;
 782
 783       ctype->mbdigits_act = 10;
 784     }
 785
 786   /* Check the outdigits.  */
 787   warned = 0;
 788   for (cnt = 0; cnt < 10; ++cnt)
 789     if (ctype->mboutdigits[cnt] == NULL)
 790       {
 791         static struct charseq replace[2];
 792
 793         if (!warned)
 794           {
 795             error (0, 0, _("\
 796 not all characters used in `outdigit' are available in the charmap"));
 797             warned = 1;
 798           }
 799
 800         replace[0].nbytes = 1;
 801         replace[0].bytes[0] = '?';
 802         replace[0].bytes[1] = '\0';
 803         ctype->mboutdigits[cnt] = &replace[0];
 804       }
 805
 806   warned = 0;
 807   for (cnt = 0; cnt < 10; ++cnt)
 808     if (ctype->wcoutdigits[cnt] == 0)
 809       {
 810         if (!warned)
 811           {
 812             error (0, 0, _("\
 813 not all characters used in `outdigit' are available in the repertoire"));
 814             warned = 1;
 815           }
 816
 817         ctype->wcoutdigits[cnt] = L'?';
 818       }
 819
 820   /* Sort the entries in the translit_ignore list.  */
 821   if (ctype->translit_ignore != NULL)
 822     {
 823       struct translit_ignore_t *firstp = ctype->translit_ignore;
 824       struct translit_ignore_t *runp;
 825
 826       ctype->ntranslit_ignore = 1;
 827
 828       for (runp = firstp->next; runp != NULL; runp = runp->next)
 829         {
 830           struct translit_ignore_t *lastp = NULL;
 831           struct translit_ignore_t *cmpp;
 832
 833           ++ctype->ntranslit_ignore;
 834
 835           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 836             if (runp->from < cmpp->from)
 837               break;
 838
 839           runp->next = lastp;
 840           if (lastp == NULL)
 841             firstp = runp;
 842         }
 843
 844       ctype->translit_ignore = firstp;
 845     }
 846 }
 847
 848
 849 void
 850 ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
 851               const char *output_path)
 852 {
 853   static const char nulbytes[4] = { 0, 0, 0, 0 };
 854   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 855   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 856                          + ctype->nr_charclass + ctype->map_collection_nr);
 857   struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
 858                   + ctype->map_collection_nr + 4];
 859   struct locale_file data;
 860   uint32_t idx[nelems + 1];
 861   uint32_t default_missing_len;
 862   size_t elem, cnt, offset, total;
 863   char *cp;
 864
 865   /* Now prepare the output: Find the sizes of the table we can use.  */
 866   allocate_arrays (ctype, charmap, ctype->repertoire);
 867
 868   data.magic = LIMAGIC (LC_CTYPE);
 869   data.n = nelems;
 870   iov[0].iov_base = (void *) &data;
 871   iov[0].iov_len = sizeof (data);
 872
 873   iov[1].iov_base = (void *) idx;
 874   iov[1].iov_len = nelems * sizeof (uint32_t);
 875
 876   idx[0] = iov[0].iov_len + iov[1].iov_len;
 877   offset = 0;
 878
 879   for (elem = 0; elem < nelems; ++elem)
 880     {
 881       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 882         switch (elem)
 883           {
 884 #define CTYPE_EMPTY(name) \
 885           case name:                                                          \
 886             iov[2 + elem + offset].iov_base = NULL;                           \
 887             iov[2 + elem + offset].iov_len = 0;                               \
 888             idx[elem + 1] = idx[elem];                                        \
 889             break
 890
 891           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 892           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 893           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 894           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 895           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 896           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 897
 898 #define CTYPE_DATA(name, base, len)                                           \
 899           case _NL_ITEM_INDEX (name):                                         \
 900             iov[2 + elem + offset].iov_base = (base);                         \
 901             iov[2 + elem + offset].iov_len = (len);                           \
 902             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 903             break
 904
 905           CTYPE_DATA (_NL_CTYPE_CLASS,
 906                       ctype->ctype_b,
 907                       (256 + 128) * sizeof (char_class_t));
 908
 909           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 910                       ctype->map_b[0],
 911                       (256 + 128) * sizeof (uint32_t));
 912           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 913                       ctype->map_b[1],
 914                       (256 + 128) * sizeof (uint32_t));
 915
 916           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 917                       ctype->map32_b[0],
 918                       256 * sizeof (uint32_t));
 919           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 920                       ctype->map32_b[1],
 921                       256 * sizeof (uint32_t));
 922
 923           CTYPE_DATA (_NL_CTYPE_CLASS32,
 924                       ctype->ctype32_b,
 925                       256 * sizeof (char_class32_t));
 926
 927           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 928                       &ctype->class_offset, sizeof (uint32_t));
 929
 930           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 931                       &ctype->map_offset, sizeof (uint32_t));
 932
 933           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 934                       &ctype->translit_idx_size, sizeof (uint32_t));
 935
 936           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 937                       ctype->translit_from_idx,
 938                       ctype->translit_idx_size * sizeof (uint32_t));
 939
 940           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 941                       ctype->translit_from_tbl,
 942                       ctype->translit_from_tbl_size);
 943
 944           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 945                       ctype->translit_to_idx,
 946                       ctype->translit_idx_size * sizeof (uint32_t));
 947
 948           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 949                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 950
 951           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 952             /* The class name array.  */
 953             total = 0;
 954             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 955               {
 956                 iov[2 + elem + offset].iov_base
 957                   = (void *) ctype->classnames[cnt];
 958                 iov[2 + elem + offset].iov_len
 959                   = strlen (ctype->classnames[cnt]) + 1;
 960                 total += iov[2 + elem + offset].iov_len;
 961               }
 962             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 963             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 964             total += 1 + (4 - ((total + 1) % 4));
 965
 966             idx[elem + 1] = idx[elem] + total;
 967             break;
 968
 969           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
 970             /* The class name array.  */
 971             total = 0;
 972             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
 973               {
 974                 iov[2 + elem + offset].iov_base
 975                   = (void *) ctype->mapnames[cnt];
 976                 iov[2 + elem + offset].iov_len
 977                   = strlen (ctype->mapnames[cnt]) + 1;
 978                 total += iov[2 + elem + offset].iov_len;
 979               }
 980             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 981             iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
 982             total += 1 + (4 - ((total + 1) % 4));
 983
 984             idx[elem + 1] = idx[elem] + total;
 985             break;
 986
 987           CTYPE_DATA (_NL_CTYPE_WIDTH,
 988                       ctype->width.iov_base,
 989                       ctype->width.iov_len);
 990
 991           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
 992                       &ctype->mb_cur_max, sizeof (uint32_t));
 993
 994           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
 995             total = strlen (ctype->codeset_name) + 1;
 996             if (total % 4 == 0)
 997               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
 998             else
 999               {
1000                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1001                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1002                                  ctype->codeset_name, total),
1003                         '\0', 4 - (total & 3));
1004                 total = (total + 3) & ~3;
1005               }
1006             iov[2 + elem + offset].iov_len = total;
1007             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1008             break;
1009
1010           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1011             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1012             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1013             *(uint32_t *) iov[2 + elem + offset].iov_base =
1014               ctype->mbdigits_act / 10;
1015             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1016             break;
1017
1018           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1019             /* Align entries.  */
1020             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1021             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1022             idx[elem] += iov[2 + elem + offset].iov_len;
1023             ++offset;
1024
1025             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1026             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1027             *(uint32_t *) iov[2 + elem + offset].iov_base =
1028               ctype->wcdigits_act / 10;
1029             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1030             break;
1031
1032           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1033             /* Compute the length of all possible characters.  For INDIGITS
1034                there might be more than one.  We simply concatenate all of
1035                them with a NUL byte following.  The NUL byte wouldn't be
1036                necessary but it makes it easier for the user.  */
1037             total = 0;
1038
1039             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1040                  cnt < ctype->mbdigits_act; cnt += 10)
1041               total += ctype->mbdigits[cnt]->nbytes + 1;
1042             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1043             iov[2 + elem + offset].iov_len = total;
1044
1045             cp = iov[2 + elem + offset].iov_base;
1046             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1047                  cnt < ctype->mbdigits_act; cnt += 10)
1048               {
1049                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1050                               ctype->mbdigits[cnt]->nbytes);
1051                 *cp++ = '\0';
1052               }
1053             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1054             break;
1055
1056           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1057             /* Compute the length of all possible characters.  For INDIGITS
1058                there might be more than one.  We simply concatenate all of
1059                them with a NUL byte following.  The NUL byte wouldn't be
1060                necessary but it makes it easier for the user.  */
1061             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1062             total = ctype->mboutdigits[cnt]->nbytes + 1;
1063             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1064             iov[2 + elem + offset].iov_len = total;
1065
1066             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1067                                ctype->mboutdigits[cnt]->bytes,
1068                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1069             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1070             break;
1071
1072           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1073             total = ctype->wcdigits_act / 10;
1074
1075             iov[2 + elem + offset].iov_base =
1076               (uint32_t *) alloca (total * sizeof (uint32_t));
1077             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1078
1079             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1080                  cnt < ctype->wcdigits_act; cnt += 10)
1081               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1082                 = ctype->wcdigits[cnt];
1083             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1084             break;
1085
1086           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1087             /* Align entries.  */
1088             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1089             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1090             idx[elem] += iov[2 + elem + offset].iov_len;
1091             ++offset;
1092             /* FALLTRHOUGH */
1093
1094           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1095             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1096             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1097             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1098             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1099             break;
1100
1101           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1102             /* Align entries.  */
1103             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1104             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1105             idx[elem] += iov[2 + elem + offset].iov_len;
1106             ++offset;
1107
1108             default_missing_len = (ctype->default_missing
1109                                    ? wcslen ((wchar_t *)ctype->default_missing)
1110                                    : 0);
1111             iov[2 + elem + offset].iov_base = &default_missing_len;
1112             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1113             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1114             break;
1115
1116           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1117             iov[2 + elem + offset].iov_base =
1118               ctype->default_missing ?: (uint32_t *) L"";
1119             iov[2 + elem + offset].iov_len =
1120               wcslen (iov[2 + elem + offset].iov_base);
1121             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1122             break;
1123
1124           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1125             /* Align entries.  */
1126             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1127             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1128             idx[elem] += iov[2 + elem + offset].iov_len;
1129             ++offset;
1130
1131             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1132             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1133             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1134             break;
1135
1136           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1137             {
1138               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1139                                                       * 3 * sizeof (uint32_t));
1140               struct translit_ignore_t *runp;
1141
1142               iov[2 + elem + offset].iov_base = ranges;
1143               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1144                                                 * 3 * sizeof (uint32_t));
1145
1146               for (runp = ctype->translit_ignore; runp != NULL;
1147                    runp = runp->next)
1148                 {
1149                   *ranges++ = runp->from;
1150                   *ranges++ = runp->to;
1151                   *ranges++ = runp->step;
1152                 }
1153             }
1154             /* Remove the following line in case a new entry is added
1155                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1156             if (elem < nelems)
1157               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1158             break;
1159
1160           default:
1161             assert (! "unknown CTYPE element");
1162           }
1163       else
1164         {
1165           /* Handle extra maps.  */
1166           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1167           if (nr < ctype->nr_charclass)
1168             {
1169               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1170               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1171               idx[elem] += iov[2 + elem + offset].iov_len;
1172               ++offset;
1173
1174               iov[2 + elem + offset] = ctype->class_3level[nr];
1175             }
1176           else
1177             {
1178               nr -= ctype->nr_charclass;
1179               assert (nr < ctype->map_collection_nr);
1180               iov[2 + elem + offset] = ctype->map_3level[nr];
1181             }
1182           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1183         }
1184     }
1185
1186   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1187                                 + ctype->map_collection_nr + 4 + 2));
1188
1189   write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
1190 }
1191
1192
1193 /* Local functions.  */
1194 static void
1195 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1196                  const char *name)
1197 {
1198   size_t cnt;
1199
1200   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1201     if (strcmp (ctype->classnames[cnt], name) == 0)
1202       break;
1203
1204   if (cnt < ctype->nr_charclass)
1205     {
1206       lr_error (lr, _("character class `%s' already defined"), name);
1207       return;
1208     }
1209
1210   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1211     /* Exit code 2 is prescribed in P1003.2b.  */
1212     error (2, 0, _("\
1213 implementation limit: no more than %Zd character classes allowed"),
1214            MAX_NR_CHARCLASS);
1215
1216   ctype->classnames[ctype->nr_charclass++] = name;
1217 }
1218
1219
1220 static void
1221 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1222                const char *name, struct charmap_t *charmap)
1223 {
1224   size_t max_chars = 0;
1225   size_t cnt;
1226
1227   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1228     {
1229       if (strcmp (ctype->mapnames[cnt], name) == 0)
1230         break;
1231
1232       if (max_chars < ctype->map_collection_max[cnt])
1233         max_chars = ctype->map_collection_max[cnt];
1234     }
1235
1236   if (cnt < ctype->map_collection_nr)
1237     {
1238       lr_error (lr, _("character map `%s' already defined"), name);
1239       return;
1240     }
1241
1242   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1243     /* Exit code 2 is prescribed in P1003.2b.  */
1244     error (2, 0, _("\
1245 implementation limit: no more than %d character maps allowed"),
1246            MAX_NR_CHARMAP);
1247
1248   ctype->mapnames[cnt] = name;
1249
1250   if (max_chars == 0)
1251     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1252   else
1253     ctype->map_collection_max[cnt] = max_chars;
1254
1255   ctype->map_collection[cnt] = (uint32_t *)
1256     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1257   ctype->map_collection_act[cnt] = 256;
1258
1259   ++ctype->map_collection_nr;
1260 }
1261
1262
1263 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1264    is possible if we only want to extend the name array.  */
1265 static uint32_t *
1266 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1267           size_t *act, uint32_t idx)
1268 {
1269   size_t cnt;
1270
1271   if (idx < 256)
1272     return table == NULL ? NULL : &(*table)[idx];
1273
1274   /* If idx is in the usual range, use the charnames_idx lookup table
1275      instead of the slow search loop.  */
1276   if (idx < MAX_CHARNAMES_IDX)
1277     {
1278       if (ctype->charnames_idx[idx] != ~((uint32_t) 0))
1279         /* Found.  */
1280         cnt = ctype->charnames_idx[idx];
1281       else
1282         /* Not found.  */
1283         cnt = ctype->charnames_act;
1284     }
1285   else
1286     {
1287       for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1288         if (ctype->charnames[cnt] == idx)
1289           break;
1290     }
1291
1292   /* We have to distinguish two cases: the name is found or not.  */
1293   if (cnt == ctype->charnames_act)
1294     {
1295       /* Extend the name array.  */
1296       if (ctype->charnames_act == ctype->charnames_max)
1297         {
1298           ctype->charnames_max *= 2;
1299           ctype->charnames = (uint32_t *)
1300             xrealloc (ctype->charnames,
1301                       sizeof (uint32_t) * ctype->charnames_max);
1302         }
1303       ctype->charnames[ctype->charnames_act++] = idx;
1304       if (idx < MAX_CHARNAMES_IDX)
1305         ctype->charnames_idx[idx] = cnt;
1306     }
1307
1308   if (table == NULL)
1309     /* We have done everything we are asked to do.  */
1310     return NULL;
1311
1312   if (cnt >= *act)
1313     {
1314       if (cnt >= *max)
1315         {
1316           size_t old_max = *max;
1317           do
1318             *max *= 2;
1319           while (*max <= cnt);
1320
1321           *table =
1322             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1323           memset (&(*table)[old_max], '\0',
1324                   (*max - old_max) * sizeof (uint32_t));
1325         }
1326
1327       *act = cnt + 1;
1328     }
1329
1330   return &(*table)[cnt];
1331 }
1332
1333
1334 static int
1335 get_character (struct token *now, struct charmap_t *charmap,
1336                struct repertoire_t *repertoire,
1337                struct charseq **seqp, uint32_t *wchp)
1338 {
1339   if (now->tok == tok_bsymbol)
1340     {
1341       /* This will hopefully be the normal case.  */
1342       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1343                                      now->val.str.lenmb);
1344       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1345                                   now->val.str.lenmb);
1346     }
1347   else if (now->tok == tok_ucs4)
1348     {
1349       char utmp[10];
1350
1351       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1352       *seqp = charmap_find_value (charmap, utmp, 9);
1353
1354       if (*seqp == NULL)
1355         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1356
1357       if (*seqp == NULL)
1358         {
1359           /* Compute the value in the charmap from the UCS value.  */
1360           const char *symbol = repertoire_find_symbol (repertoire,
1361                                                        now->val.ucs4);
1362
1363           if (symbol == NULL)
1364             *seqp = NULL;
1365           else
1366             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1367
1368           if (*seqp == NULL)
1369             {
1370               if (repertoire != NULL)
1371                 {
1372                   /* Insert a negative entry.  */
1373                   static const struct charseq negative
1374                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1375                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1376                                                   sizeof (uint32_t));
1377                   *newp = now->val.ucs4;
1378
1379                   insert_entry (&repertoire->seq_table, newp,
1380                                 sizeof (uint32_t), (void *) &negative);
1381                 }
1382             }
1383           else
1384             (*seqp)->ucs4 = now->val.ucs4;
1385         }
1386       else if ((*seqp)->ucs4 != now->val.ucs4)
1387         *seqp = NULL;
1388
1389       *wchp = now->val.ucs4;
1390     }
1391   else if (now->tok == tok_charcode)
1392     {
1393       /* We must map from the byte code to UCS4.  */
1394       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1395                                    now->val.str.lenmb);
1396
1397       if (*seqp == NULL)
1398         *wchp = ILLEGAL_CHAR_VALUE;
1399       else
1400         {
1401           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1402             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1403                                                    strlen ((*seqp)->name));
1404           *wchp = (*seqp)->ucs4;
1405         }
1406     }
1407   else
1408     return 1;
1409
1410   return 0;
1411 }
1412
1413
1414 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1415    the .(2). counterparts.  */
1416 static void
1417 charclass_symbolic_ellipsis (struct linereader *ldfile,
1418                              struct locale_ctype_t *ctype,
1419                              struct charmap_t *charmap,
1420                              struct repertoire_t *repertoire,
1421                              struct token *now,
1422                              const char *last_str,
1423                              unsigned long int class256_bit,
1424                              unsigned long int class_bit, int base,
1425                              int ignore_content, int handle_digits, int step)
1426 {
1427   const char *nowstr = now->val.str.startmb;
1428   char tmp[now->val.str.lenmb + 1];
1429   const char *cp;
1430   char *endp;
1431   unsigned long int from;
1432   unsigned long int to;
1433
1434   /* We have to compute the ellipsis values using the symbolic names.  */
1435   assert (last_str != NULL);
1436
1437   if (strlen (last_str) != now->val.str.lenmb)
1438     {
1439     invalid_range:
1440       lr_error (ldfile,
1441                 _("`%s' and `%.*s' are no valid names for symbolic range"),
1442                 last_str, (int) now->val.str.lenmb, nowstr);
1443       return;
1444     }
1445
1446   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1447     /* Nothing to do, the names are the same.  */
1448     return;
1449
1450   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1451     ;
1452
1453   errno = 0;
1454   from = strtoul (cp, &endp, base);
1455   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1456     goto invalid_range;
1457
1458   to = strtoul (nowstr + (cp - last_str), &endp, base);
1459   if ((to == UINT_MAX && errno == ERANGE)
1460       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1461     goto invalid_range;
1462
1463   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1464   if (!ignore_content)
1465     {
1466       now->val.str.startmb = tmp;
1467       while ((from += step) <= to)
1468         {
1469           struct charseq *seq;
1470           uint32_t wch;
1471
1472           sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
1473                    last_str, now->val.str.lenmb - (cp - last_str), from);
1474
1475           get_character (now, charmap, repertoire, &seq, &wch);
1476
1477           if (seq != NULL && seq->nbytes == 1)
1478             /* Yep, we can store information about this byte sequence.  */
1479             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1480
1481           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1482             /* We have the UCS4 position.  */
1483             *find_idx (ctype, &ctype->class_collection,
1484                        &ctype->class_collection_max,
1485                        &ctype->class_collection_act, wch) |= class_bit;
1486
1487           if (handle_digits == 1)
1488             {
1489               /* We must store the digit values.  */
1490               if (ctype->mbdigits_act == ctype->mbdigits_max)
1491                 {
1492                   ctype->mbdigits_max *= 2;
1493                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1494                                               (ctype->mbdigits_max
1495                                                * sizeof (char *)));
1496                   ctype->wcdigits_max *= 2;
1497                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1498                                               (ctype->wcdigits_max
1499                                                * sizeof (uint32_t)));
1500                 }
1501
1502               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1503               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1504             }
1505           else if (handle_digits == 2)
1506             {
1507               /* We must store the digit values.  */
1508               if (ctype->outdigits_act >= 10)
1509                 {
1510                   lr_error (ldfile, _("\
1511 %s: field `%s' does not contain exactly ten entries"),
1512                             "LC_CTYPE", "outdigit");
1513                   return;
1514                 }
1515
1516               ctype->mboutdigits[ctype->outdigits_act] = seq;
1517               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1518               ++ctype->outdigits_act;
1519             }
1520         }
1521     }
1522 }
1523
1524
1525 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1526 static void
1527 charclass_ucs4_ellipsis (struct linereader *ldfile,
1528                          struct locale_ctype_t *ctype,
1529                          struct charmap_t *charmap,
1530                          struct repertoire_t *repertoire,
1531                          struct token *now, uint32_t last_wch,
1532                          unsigned long int class256_bit,
1533                          unsigned long int class_bit, int ignore_content,
1534                          int handle_digits, int step)
1535 {
1536   if (last_wch > now->val.ucs4)
1537     {
1538       lr_error (ldfile, _("\
1539 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1540                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1541                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1542       return;
1543     }
1544
1545   if (!ignore_content)
1546     while ((last_wch += step) <= now->val.ucs4)
1547       {
1548         /* We have to find out whether there is a byte sequence corresponding
1549            to this UCS4 value.  */
1550         struct charseq *seq;
1551         char utmp[10];
1552
1553         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1554         seq = charmap_find_value (charmap, utmp, 9);
1555         if (seq == NULL)
1556           {
1557             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1558             seq = charmap_find_value (charmap, utmp, 5);
1559           }
1560
1561         if (seq == NULL)
1562           /* Try looking in the repertoire map.  */
1563           seq = repertoire_find_seq (repertoire, last_wch);
1564
1565         /* If this is the first time we look for this sequence create a new
1566            entry.  */
1567         if (seq == NULL)
1568           {
1569             static const struct charseq negative
1570               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1571
1572             /* Find the symbolic name for this UCS4 value.  */
1573             if (repertoire != NULL)
1574               {
1575                 const char *symbol = repertoire_find_symbol (repertoire,
1576                                                              last_wch);
1577                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1578                                                 sizeof (uint32_t));
1579                 *newp = last_wch;
1580
1581                 if (symbol != NULL)
1582                   /* We have a name, now search the multibyte value.  */
1583                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1584
1585                 if (seq == NULL)
1586                   /* We have to create a fake entry.  */
1587                   seq = (struct charseq *) &negative;
1588                 else
1589                   seq->ucs4 = last_wch;
1590
1591                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1592                               seq);
1593               }
1594             else
1595               /* We have to create a fake entry.  */
1596               seq = (struct charseq *) &negative;
1597           }
1598
1599         /* We have a name, now search the multibyte value.  */
1600         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1601           /* Yep, we can store information about this byte sequence.  */
1602           ctype->class256_collection[(size_t) seq->bytes[0]]
1603             |= class256_bit;
1604
1605         /* And of course we have the UCS4 position.  */
1606         if (class_bit != 0)
1607           *find_idx (ctype, &ctype->class_collection,
1608                      &ctype->class_collection_max,
1609                      &ctype->class_collection_act, last_wch) |= class_bit;
1610
1611         if (handle_digits == 1)
1612           {
1613             /* We must store the digit values.  */
1614             if (ctype->mbdigits_act == ctype->mbdigits_max)
1615               {
1616                 ctype->mbdigits_max *= 2;
1617                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1618                                             (ctype->mbdigits_max
1619                                              * sizeof (char *)));
1620                 ctype->wcdigits_max *= 2;
1621                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1622                                             (ctype->wcdigits_max
1623                                              * sizeof (uint32_t)));
1624               }
1625
1626             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1627                                                       ? seq : NULL);
1628             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1629           }
1630         else if (handle_digits == 2)
1631           {
1632             /* We must store the digit values.  */
1633             if (ctype->outdigits_act >= 10)
1634               {
1635                 lr_error (ldfile, _("\
1636 %s: field `%s' does not contain exactly ten entries"),
1637                           "LC_CTYPE", "outdigit");
1638                 return;
1639               }
1640
1641             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1642                                                         ? seq : NULL);
1643             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1644             ++ctype->outdigits_act;
1645           }
1646       }
1647 }
1648
1649
1650 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1651 static void
1652 charclass_charcode_ellipsis (struct linereader *ldfile,
1653                              struct locale_ctype_t *ctype,
1654                              struct charmap_t *charmap,
1655                              struct repertoire_t *repertoire,
1656                              struct token *now, char *last_charcode,
1657                              uint32_t last_charcode_len,
1658                              unsigned long int class256_bit,
1659                              unsigned long int class_bit, int ignore_content,
1660                              int handle_digits)
1661 {
1662   /* First check whether the to-value is larger.  */
1663   if (now->val.charcode.nbytes != last_charcode_len)
1664     {
1665       lr_error (ldfile, _("\
1666 start end end character sequence of range must have the same length"));
1667       return;
1668     }
1669
1670   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1671     {
1672       lr_error (ldfile, _("\
1673 to-value character sequence is smaller than from-value sequence"));
1674       return;
1675     }
1676
1677   if (!ignore_content)
1678     {
1679       do
1680         {
1681           /* Increment the byte sequence value.  */
1682           struct charseq *seq;
1683           uint32_t wch;
1684           int i;
1685
1686           for (i = last_charcode_len - 1; i >= 0; --i)
1687             if (++last_charcode[i] != 0)
1688               break;
1689
1690           if (last_charcode_len == 1)
1691             /* Of course we have the charcode value.  */
1692             ctype->class256_collection[(size_t) last_charcode[0]]
1693               |= class256_bit;
1694
1695           /* Find the symbolic name.  */
1696           seq = charmap_find_symbol (charmap, last_charcode,
1697                                      last_charcode_len);
1698           if (seq != NULL)
1699             {
1700               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1701                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1702                                                    strlen (seq->name));
1703               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1704
1705               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1706                 *find_idx (ctype, &ctype->class_collection,
1707                            &ctype->class_collection_max,
1708                            &ctype->class_collection_act, wch) |= class_bit;
1709             }
1710           else
1711             wch = ILLEGAL_CHAR_VALUE;
1712
1713           if (handle_digits == 1)
1714             {
1715               /* We must store the digit values.  */
1716               if (ctype->mbdigits_act == ctype->mbdigits_max)
1717                 {
1718                   ctype->mbdigits_max *= 2;
1719                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1720                                               (ctype->mbdigits_max
1721                                                * sizeof (char *)));
1722                   ctype->wcdigits_max *= 2;
1723                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1724                                               (ctype->wcdigits_max
1725                                                * sizeof (uint32_t)));
1726                 }
1727
1728               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1729               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1730               seq->nbytes = last_charcode_len;
1731
1732               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1733               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1734             }
1735           else if (handle_digits == 2)
1736             {
1737               struct charseq *seq;
1738               /* We must store the digit values.  */
1739               if (ctype->outdigits_act >= 10)
1740                 {
1741                   lr_error (ldfile, _("\
1742 %s: field `%s' does not contain exactly ten entries"),
1743                             "LC_CTYPE", "outdigit");
1744                   return;
1745                 }
1746
1747               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1748               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1749               seq->nbytes = last_charcode_len;
1750
1751               ctype->mboutdigits[ctype->outdigits_act] = seq;
1752               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1753               ++ctype->outdigits_act;
1754             }
1755         }
1756       while (memcmp (last_charcode, now->val.charcode.bytes,
1757                      last_charcode_len) != 0);
1758     }
1759 }
1760
1761
1762 /* Read one transliteration entry.  */
1763 static uint32_t *
1764 read_widestring (struct linereader *ldfile, struct token *now,
1765                  struct charmap_t *charmap, struct repertoire_t *repertoire)
1766 {
1767   uint32_t *wstr;
1768
1769   if (now->tok == tok_default_missing)
1770     /* The special name "" will denote this case.  */
1771     wstr = ((uint32_t *) { 0 });
1772   else if (now->tok == tok_bsymbol)
1773     {
1774       /* Get the value from the repertoire.  */
1775       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1776       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1777                                        now->val.str.lenmb);
1778       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1779         {
1780           /* We cannot proceed, we don't know the UCS4 value.  */
1781           free (wstr);
1782           return NULL;
1783         }
1784
1785       wstr[1] = 0;
1786     }
1787   else if (now->tok == tok_ucs4)
1788     {
1789       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1790       wstr[0] = now->val.ucs4;
1791       wstr[1] = 0;
1792     }
1793   else if (now->tok == tok_charcode)
1794     {
1795       /* Argh, we have to convert to the symbol name first and then to the
1796          UCS4 value.  */
1797       struct charseq *seq = charmap_find_symbol (charmap,
1798                                                  now->val.str.startmb,
1799                                                  now->val.str.lenmb);
1800       if (seq == NULL)
1801         /* Cannot find the UCS4 value.  */
1802         return NULL;
1803
1804       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1805         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1806                                            strlen (seq->name));
1807       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1808         /* We cannot proceed, we don't know the UCS4 value.  */
1809         return NULL;
1810
1811       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1812       wstr[0] = seq->ucs4;
1813       wstr[1] = 0;
1814     }
1815   else if (now->tok == tok_string)
1816     {
1817       wstr = now->val.str.startwc;
1818       if (wstr == NULL || wstr[0] == 0)
1819         return NULL;
1820     }
1821   else
1822     {
1823       if (now->tok != tok_eol && now->tok != tok_eof)
1824         lr_ignore_rest (ldfile, 0);
1825       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1826       return (uint32_t *) -1l;
1827     }
1828
1829   return wstr;
1830 }
1831
1832
1833 static void
1834 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1835                      struct token *now, struct charmap_t *charmap,
1836                      struct repertoire_t *repertoire)
1837 {
1838   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1839   struct translit_t *result;
1840   struct translit_to_t **top;
1841   struct obstack *ob = &ctype->mempool;
1842   int first;
1843   int ignore;
1844
1845   if (from_wstr == NULL)
1846     /* There is no valid from string.  */
1847     return;
1848
1849   result = (struct translit_t *) obstack_alloc (ob,
1850                                                 sizeof (struct translit_t));
1851   result->from = from_wstr;
1852   result->fname = ldfile->fname;
1853   result->lineno = ldfile->lineno;
1854   result->next = NULL;
1855   result->to = NULL;
1856   top = &result->to;
1857   first = 1;
1858   ignore = 0;
1859
1860   while (1)
1861     {
1862       uint32_t *to_wstr;
1863
1864       /* Next we have one or more transliterations.  They are
1865          separated by semicolons.  */
1866       now = lr_token (ldfile, charmap, repertoire);
1867
1868       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1869         {
1870           /* One string read.  */
1871           const uint32_t zero = 0;
1872
1873           if (!ignore)
1874             {
1875               obstack_grow (ob, &zero, 4);
1876               to_wstr = obstack_finish (ob);
1877
1878               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1879               (*top)->str = to_wstr;
1880               (*top)->next = NULL;
1881             }
1882
1883           if (now->tok == tok_eol)
1884             {
1885               result->next = ctype->translit;
1886               ctype->translit = result;
1887               return;
1888             }
1889
1890           if (!ignore)
1891             top = &(*top)->next;
1892           ignore = 0;
1893         }
1894       else
1895         {
1896           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1897           if (to_wstr == (uint32_t *) -1l)
1898             {
1899               /* An error occurred.  */
1900               obstack_free (ob, result);
1901               return;
1902             }
1903
1904           if (to_wstr == NULL)
1905             ignore = 1;
1906           else
1907             /* This value is usable.  */
1908             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1909
1910           first = 0;
1911         }
1912     }
1913 }
1914
1915
1916 static void
1917 read_translit_ignore_entry (struct linereader *ldfile,
1918                             struct locale_ctype_t *ctype,
1919                             struct charmap_t *charmap,
1920                             struct repertoire_t *repertoire)
1921 {
1922   /* We expect a semicolon-separated list of characters we ignore.  We are
1923      only interested in the wide character definitions.  These must be
1924      single characters, possibly defining a range when an ellipsis is used.  */
1925   while (1)
1926     {
1927       struct token *now = lr_token (ldfile, charmap, repertoire);
1928       struct translit_ignore_t *newp;
1929       uint32_t from;
1930
1931       if (now->tok == tok_eol || now->tok == tok_eof)
1932         {
1933           lr_error (ldfile,
1934                     _("premature end of `translit_ignore' definition"));
1935           return;
1936         }
1937
1938       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1939         {
1940           lr_error (ldfile, _("syntax error"));
1941           lr_ignore_rest (ldfile, 0);
1942           return;
1943         }
1944
1945       if (now->tok == tok_ucs4)
1946         from = now->val.ucs4;
1947       else
1948         /* Try to get the value.  */
1949         from = repertoire_find_value (repertoire, now->val.str.startmb,
1950                                       now->val.str.lenmb);
1951
1952       if (from == ILLEGAL_CHAR_VALUE)
1953         {
1954           lr_error (ldfile, "invalid character name");
1955           newp = NULL;
1956         }
1957       else
1958         {
1959           newp = (struct translit_ignore_t *)
1960             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1961           newp->from = from;
1962           newp->to = from;
1963           newp->step = 1;
1964
1965           newp->next = ctype->translit_ignore;
1966           ctype->translit_ignore = newp;
1967         }
1968
1969       /* Now we expect either a semicolon, an ellipsis, or the end of the
1970          line.  */
1971       now = lr_token (ldfile, charmap, repertoire);
1972
1973       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1974         {
1975           /* XXX Should we bother implementing `....'?  `...' certainly
1976              will not be implemented.  */
1977           uint32_t to;
1978           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
1979
1980           now = lr_token (ldfile, charmap, repertoire);
1981
1982           if (now->tok == tok_eol || now->tok == tok_eof)
1983             {
1984               lr_error (ldfile,
1985                         _("premature end of `translit_ignore' definition"));
1986               return;
1987             }
1988
1989           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1990             {
1991               lr_error (ldfile, _("syntax error"));
1992               lr_ignore_rest (ldfile, 0);
1993               return;
1994             }
1995
1996           if (now->tok == tok_ucs4)
1997             to = now->val.ucs4;
1998           else
1999             /* Try to get the value.  */
2000             to = repertoire_find_value (repertoire, now->val.str.startmb,
2001                                         now->val.str.lenmb);
2002
2003           if (to == ILLEGAL_CHAR_VALUE)
2004             lr_error (ldfile, "invalid character name");
2005           else
2006             {
2007               /* Make sure the `to'-value is larger.  */
2008               if (to >= from)
2009                 {
2010                   newp->to = to;
2011                   newp->step = step;
2012                 }
2013               else
2014                 lr_error (ldfile, _("\
2015 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2016                           (to | from) < 65536 ? 4 : 8, to,
2017                           (to | from) < 65536 ? 4 : 8, from);
2018             }
2019
2020           /* And the next token.  */
2021           now = lr_token (ldfile, charmap, repertoire);
2022         }
2023
2024       if (now->tok == tok_eol || now->tok == tok_eof)
2025         /* We are done.  */
2026         return;
2027
2028       if (now->tok == tok_semicolon)
2029         /* Next round.  */
2030         continue;
2031
2032       /* If we come here something is wrong.  */
2033       lr_error (ldfile, _("syntax error"));
2034       lr_ignore_rest (ldfile, 0);
2035       return;
2036     }
2037 }
2038
2039
2040 /* The parser for the LC_CTYPE section of the locale definition.  */
2041 void
2042 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2043             struct charmap_t *charmap, const char *repertoire_name,
2044             int ignore_content)
2045 {
2046   struct repertoire_t *repertoire = NULL;
2047   struct locale_ctype_t *ctype;
2048   struct token *now;
2049   enum token_t nowtok;
2050   size_t cnt;
2051   struct charseq *last_seq;
2052   uint32_t last_wch = 0;
2053   enum token_t last_token;
2054   enum token_t ellipsis_token;
2055   int step;
2056   char last_charcode[16];
2057   size_t last_charcode_len = 0;
2058   const char *last_str = NULL;
2059   int mapidx;
2060   struct localedef_t *copy_locale = NULL;
2061
2062   /* Get the repertoire we have to use.  */
2063   if (repertoire_name != NULL)
2064     repertoire = repertoire_read (repertoire_name);
2065
2066   /* The rest of the line containing `LC_CTYPE' must be free.  */
2067   lr_ignore_rest (ldfile, 1);
2068
2069
2070   do
2071     {
2072       now = lr_token (ldfile, charmap, NULL);
2073       nowtok = now->tok;
2074     }
2075   while (nowtok == tok_eol);
2076
2077   /* If we see `copy' now we are almost done.  */
2078   if (nowtok == tok_copy)
2079     {
2080       now = lr_token (ldfile, charmap, NULL);
2081       if (now->tok != tok_string)
2082         {
2083           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2084
2085         skip_category:
2086           do
2087             now = lr_token (ldfile, charmap, NULL);
2088           while (now->tok != tok_eof && now->tok != tok_end);
2089
2090           if (now->tok != tok_eof
2091               || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
2092             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2093           else if (now->tok != tok_lc_ctype)
2094             {
2095               lr_error (ldfile, _("\
2096 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2097               lr_ignore_rest (ldfile, 0);
2098             }
2099           else
2100             lr_ignore_rest (ldfile, 1);
2101
2102           return;
2103         }
2104
2105       if (! ignore_content)
2106         {
2107           /* Get the locale definition.  */
2108           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2109                                      repertoire_name, charmap, NULL);
2110           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2111             {
2112               /* Not yet loaded.  So do it now.  */
2113               if (locfile_read (copy_locale, charmap) != 0)
2114                 goto skip_category;
2115             }
2116         }
2117
2118       lr_ignore_rest (ldfile, 1);
2119
2120       now = lr_token (ldfile, charmap, NULL);
2121       nowtok = now->tok;
2122     }
2123
2124   /* Prepare the data structures.  */
2125   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2126   ctype = result->categories[LC_CTYPE].ctype;
2127
2128   /* Remember the repertoire we use.  */
2129   if (!ignore_content)
2130     ctype->repertoire = repertoire;
2131
2132   while (1)
2133     {
2134       unsigned long int class_bit = 0;
2135       unsigned long int class256_bit = 0;
2136       int handle_digits = 0;
2137
2138       /* Of course we don't proceed beyond the end of file.  */
2139       if (nowtok == tok_eof)
2140         break;
2141
2142       /* Ingore empty lines.  */
2143       if (nowtok == tok_eol)
2144         {
2145           now = lr_token (ldfile, charmap, NULL);
2146           nowtok = now->tok;
2147           continue;
2148         }
2149
2150       switch (nowtok)
2151         {
2152         case tok_charclass:
2153           now = lr_token (ldfile, charmap, NULL);
2154           while (now->tok == tok_ident || now->tok == tok_string)
2155             {
2156               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2157               now = lr_token (ldfile, charmap, NULL);
2158               if (now->tok != tok_semicolon)
2159                 break;
2160               now = lr_token (ldfile, charmap, NULL);
2161             }
2162           if (now->tok != tok_eol)
2163             SYNTAX_ERROR (_("\
2164 %s: syntax error in definition of new character class"), "LC_CTYPE");
2165           break;
2166
2167         case tok_charconv:
2168           now = lr_token (ldfile, charmap, NULL);
2169           while (now->tok == tok_ident || now->tok == tok_string)
2170             {
2171               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2172               now = lr_token (ldfile, charmap, NULL);
2173               if (now->tok != tok_semicolon)
2174                 break;
2175               now = lr_token (ldfile, charmap, NULL);
2176             }
2177           if (now->tok != tok_eol)
2178             SYNTAX_ERROR (_("\
2179 %s: syntax error in definition of new character map"), "LC_CTYPE");
2180           break;
2181
2182         case tok_class:
2183           /* Ignore the rest of the line if we don't need the input of
2184              this line.  */
2185           if (ignore_content)
2186             {
2187               lr_ignore_rest (ldfile, 0);
2188               break;
2189             }
2190
2191           /* We simply forget the `class' keyword and use the following
2192              operand to determine the bit.  */
2193           now = lr_token (ldfile, charmap, NULL);
2194           if (now->tok == tok_ident || now->tok == tok_string)
2195             {
2196               /* Must can be one of the predefined class names.  */
2197               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2198                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2199                   break;
2200               if (cnt >= ctype->nr_charclass)
2201                 {
2202 #ifdef PREDEFINED_CLASSES
2203                   if (now->val.str.lenmb == 8
2204                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2205                     class_bit = _ISwspecial1;
2206                   else if (now->val.str.lenmb == 8
2207                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2208                     class_bit = _ISwspecial2;
2209                   else if (now->val.str.lenmb == 8
2210                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2211                     class_bit = _ISwspecial3;
2212                   else
2213 #endif
2214                     {
2215                       /* OK, it's a new class.  */
2216                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2217
2218                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2219                     }
2220                 }
2221               else
2222                 {
2223                   class_bit = _ISwbit (cnt);
2224
2225                   free (now->val.str.startmb);
2226                 }
2227             }
2228           else if (now->tok == tok_digit)
2229             goto handle_tok_digit;
2230           else if (now->tok < tok_upper || now->tok > tok_blank)
2231             goto err_label;
2232           else
2233             {
2234               class_bit = BITw (now->tok);
2235               class256_bit = BIT (now->tok);
2236             }
2237
2238           /* The next character must be a semicolon.  */
2239           now = lr_token (ldfile, charmap, NULL);
2240           if (now->tok != tok_semicolon)
2241             goto err_label;
2242           goto read_charclass;
2243
2244         case tok_upper:
2245         case tok_lower:
2246         case tok_alpha:
2247         case tok_alnum:
2248         case tok_space:
2249         case tok_cntrl:
2250         case tok_punct:
2251         case tok_graph:
2252         case tok_print:
2253         case tok_xdigit:
2254         case tok_blank:
2255           /* Ignore the rest of the line if we don't need the input of
2256              this line.  */
2257           if (ignore_content)
2258             {
2259               lr_ignore_rest (ldfile, 0);
2260               break;
2261             }
2262
2263           class_bit = BITw (now->tok);
2264           class256_bit = BIT (now->tok);
2265           handle_digits = 0;
2266         read_charclass:
2267           ctype->class_done |= class_bit;
2268           last_token = tok_none;
2269           ellipsis_token = tok_none;
2270           step = 1;
2271           now = lr_token (ldfile, charmap, NULL);
2272           while (now->tok != tok_eol && now->tok != tok_eof)
2273             {
2274               uint32_t wch;
2275               struct charseq *seq;
2276
2277               if (ellipsis_token == tok_none)
2278                 {
2279                   if (get_character (now, charmap, repertoire, &seq, &wch))
2280                     goto err_label;
2281
2282                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2283                     /* Yep, we can store information about this byte
2284                        sequence.  */
2285                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2286
2287                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2288                       && class_bit != 0)
2289                     /* We have the UCS4 position.  */
2290                     *find_idx (ctype, &ctype->class_collection,
2291                                &ctype->class_collection_max,
2292                                &ctype->class_collection_act, wch) |= class_bit;
2293
2294                   last_token = now->tok;
2295                   /* Terminate the string.  */
2296                   if (last_token == tok_bsymbol)
2297                     {
2298                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2299                       last_str = now->val.str.startmb;
2300                     }
2301                   else
2302                     last_str = NULL;
2303                   last_seq = seq;
2304                   last_wch = wch;
2305                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2306                   last_charcode_len = now->val.charcode.nbytes;
2307
2308                   if (!ignore_content && handle_digits == 1)
2309                     {
2310                       /* We must store the digit values.  */
2311                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2312                         {
2313                           ctype->mbdigits_max += 10;
2314                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2315                                                       (ctype->mbdigits_max
2316                                                        * sizeof (char *)));
2317                           ctype->wcdigits_max += 10;
2318                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2319                                                       (ctype->wcdigits_max
2320                                                        * sizeof (uint32_t)));
2321                         }
2322
2323                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2324                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2325                     }
2326                   else if (!ignore_content && handle_digits == 2)
2327                     {
2328                       /* We must store the digit values.  */
2329                       if (ctype->outdigits_act >= 10)
2330                         {
2331                           lr_error (ldfile, _("\
2332 %s: field `%s' does not contain exactly ten entries"),
2333                             "LC_CTYPE", "outdigit");
2334                           lr_ignore_rest (ldfile, 0);
2335                           break;
2336                         }
2337
2338                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2339                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2340                       ++ctype->outdigits_act;
2341                     }
2342                 }
2343               else
2344                 {
2345                   /* Now it gets complicated.  We have to resolve the
2346                      ellipsis problem.  First we must distinguish between
2347                      the different kind of ellipsis and this must match the
2348                      tokens we have seen.  */
2349                   assert (last_token != tok_none);
2350
2351                   if (last_token != now->tok)
2352                     {
2353                       lr_error (ldfile, _("\
2354 ellipsis range must be marked by two operands of same type"));
2355                       lr_ignore_rest (ldfile, 0);
2356                       break;
2357                     }
2358
2359                   if (last_token == tok_bsymbol)
2360                     {
2361                       if (ellipsis_token == tok_ellipsis3)
2362                         lr_error (ldfile, _("with symbolic name range values \
2363 the absolute ellipsis `...' must not be used"));
2364
2365                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2366                                                    repertoire, now, last_str,
2367                                                    class256_bit, class_bit,
2368                                                    (ellipsis_token
2369                                                     == tok_ellipsis4
2370                                                     ? 10 : 16),
2371                                                    ignore_content,
2372                                                    handle_digits, step);
2373                     }
2374                   else if (last_token == tok_ucs4)
2375                     {
2376                       if (ellipsis_token != tok_ellipsis2)
2377                         lr_error (ldfile, _("\
2378 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2379
2380                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2381                                                repertoire, now, last_wch,
2382                                                class256_bit, class_bit,
2383                                                ignore_content, handle_digits,
2384                                                step);
2385                     }
2386                   else
2387                     {
2388                       assert (last_token == tok_charcode);
2389
2390                       if (ellipsis_token != tok_ellipsis3)
2391                         lr_error (ldfile, _("\
2392 with character code range values one must use the absolute ellipsis `...'"));
2393
2394                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2395                                                    repertoire, now,
2396                                                    last_charcode,
2397                                                    last_charcode_len,
2398                                                    class256_bit, class_bit,
2399                                                    ignore_content,
2400                                                    handle_digits);
2401                     }
2402
2403                   /* Now we have used the last value.  */
2404                   last_token = tok_none;
2405                 }
2406
2407               /* Next we expect a semicolon or the end of the line.  */
2408               now = lr_token (ldfile, charmap, NULL);
2409               if (now->tok == tok_eol || now->tok == tok_eof)
2410                 break;
2411
2412               if (last_token != tok_none
2413                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2414                 {
2415                   if (now->tok == tok_ellipsis2_2)
2416                     {
2417                       now->tok = tok_ellipsis2;
2418                       step = 2;
2419                     }
2420                   else if (now->tok == tok_ellipsis4_2)
2421                     {
2422                       now->tok = tok_ellipsis4;
2423                       step = 2;
2424                     }
2425
2426                   ellipsis_token = now->tok;
2427
2428                   now = lr_token (ldfile, charmap, NULL);
2429                   continue;
2430                 }
2431
2432               if (now->tok != tok_semicolon)
2433                 goto err_label;
2434
2435               /* And get the next character.  */
2436               now = lr_token (ldfile, charmap, NULL);
2437
2438               ellipsis_token = tok_none;
2439               step = 1;
2440             }
2441           break;
2442
2443         case tok_digit:
2444           /* Ignore the rest of the line if we don't need the input of
2445              this line.  */
2446           if (ignore_content)
2447             {
2448               lr_ignore_rest (ldfile, 0);
2449               break;
2450             }
2451
2452         handle_tok_digit:
2453           class_bit = _ISwdigit;
2454           class256_bit = _ISdigit;
2455           handle_digits = 1;
2456           goto read_charclass;
2457
2458         case tok_outdigit:
2459           /* Ignore the rest of the line if we don't need the input of
2460              this line.  */
2461           if (ignore_content)
2462             {
2463               lr_ignore_rest (ldfile, 0);
2464               break;
2465             }
2466
2467           if (ctype->outdigits_act != 0)
2468             lr_error (ldfile, _("\
2469 %s: field `%s' declared more than once"),
2470                       "LC_CTYPE", "outdigit");
2471           class_bit = 0;
2472           class256_bit = 0;
2473           handle_digits = 2;
2474           goto read_charclass;
2475
2476         case tok_toupper:
2477           /* Ignore the rest of the line if we don't need the input of
2478              this line.  */
2479           if (ignore_content)
2480             {
2481               lr_ignore_rest (ldfile, 0);
2482               break;
2483             }
2484
2485           mapidx = 0;
2486           goto read_mapping;
2487
2488         case tok_tolower:
2489           /* Ignore the rest of the line if we don't need the input of
2490              this line.  */
2491           if (ignore_content)
2492             {
2493               lr_ignore_rest (ldfile, 0);
2494               break;
2495             }
2496
2497           mapidx = 1;
2498           goto read_mapping;
2499
2500         case tok_map:
2501           /* Ignore the rest of the line if we don't need the input of
2502              this line.  */
2503           if (ignore_content)
2504             {
2505               lr_ignore_rest (ldfile, 0);
2506               break;
2507             }
2508
2509           /* We simply forget the `map' keyword and use the following
2510              operand to determine the mapping.  */
2511           now = lr_token (ldfile, charmap, NULL);
2512           if (now->tok == tok_ident || now->tok == tok_string)
2513             {
2514               size_t cnt;
2515
2516               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2517                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2518                   break;
2519
2520               if (cnt < ctype->map_collection_nr)
2521                 free (now->val.str.startmb);
2522               else
2523                 /* OK, it's a new map.  */
2524                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2525
2526               mapidx = cnt;
2527             }
2528           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2529             goto err_label;
2530           else
2531             mapidx = now->tok - tok_toupper;
2532
2533           now = lr_token (ldfile, charmap, NULL);
2534           /* This better should be a semicolon.  */
2535           if (now->tok != tok_semicolon)
2536             goto err_label;
2537
2538         read_mapping:
2539           /* Test whether this mapping was already defined.  */
2540           if (ctype->tomap_done[mapidx])
2541             {
2542               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2543                         ctype->mapnames[mapidx]);
2544               lr_ignore_rest (ldfile, 0);
2545               break;
2546             }
2547           ctype->tomap_done[mapidx] = 1;
2548
2549           now = lr_token (ldfile, charmap, NULL);
2550           while (now->tok != tok_eol && now->tok != tok_eof)
2551             {
2552               struct charseq *from_seq;
2553               uint32_t from_wch;
2554               struct charseq *to_seq;
2555               uint32_t to_wch;
2556
2557               /* Every pair starts with an opening brace.  */
2558               if (now->tok != tok_open_brace)
2559                 goto err_label;
2560
2561               /* Next comes the from-value.  */
2562               now = lr_token (ldfile, charmap, NULL);
2563               if (get_character (now, charmap, repertoire, &from_seq,
2564                                  &from_wch) != 0)
2565                 goto err_label;
2566
2567               /* The next is a comma.  */
2568               now = lr_token (ldfile, charmap, NULL);
2569               if (now->tok != tok_comma)
2570                 goto err_label;
2571
2572               /* And the other value.  */
2573               now = lr_token (ldfile, charmap, NULL);
2574               if (get_character (now, charmap, repertoire, &to_seq,
2575                                  &to_wch) != 0)
2576                 goto err_label;
2577
2578               /* And the last thing is the closing brace.  */
2579               now = lr_token (ldfile, charmap, NULL);
2580               if (now->tok != tok_close_brace)
2581                 goto err_label;
2582
2583               if (!ignore_content)
2584                 {
2585                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2586                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2587                     /* We can use this value.  */
2588                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2589                       = to_seq->bytes[0];
2590
2591                   if (from_wch != ILLEGAL_CHAR_VALUE
2592                       && to_wch != ILLEGAL_CHAR_VALUE)
2593                     /* Both correct values.  */
2594                     *find_idx (ctype, &ctype->map_collection[mapidx],
2595                                &ctype->map_collection_max[mapidx],
2596                                &ctype->map_collection_act[mapidx],
2597                                from_wch) = to_wch;
2598                 }
2599
2600               /* Now comes a semicolon or the end of the line/file.  */
2601               now = lr_token (ldfile, charmap, NULL);
2602               if (now->tok == tok_semicolon)
2603                 now = lr_token (ldfile, charmap, NULL);
2604             }
2605           break;
2606
2607         case tok_translit_start:
2608           /* Ignore the rest of the line if we don't need the input of
2609              this line.  */
2610           if (ignore_content)
2611             {
2612               lr_ignore_rest (ldfile, 0);
2613               break;
2614             }
2615
2616           /* The rest of the line better should be empty.  */
2617           lr_ignore_rest (ldfile, 1);
2618
2619           /* We count here the number of allocated entries in the `translit'
2620              array.  */
2621           cnt = 0;
2622
2623           ldfile->translate_strings = 1;
2624           ldfile->return_widestr = 1;
2625
2626           /* We proceed until we see the `translit_end' token.  */
2627           while (now = lr_token (ldfile, charmap, repertoire),
2628                  now->tok != tok_translit_end && now->tok != tok_eof)
2629             {
2630               if (now->tok == tok_eol)
2631                 /* Ignore empty lines.  */
2632                 continue;
2633
2634               if (now->tok == tok_translit_end)
2635                 {
2636                   lr_ignore_rest (ldfile, 0);
2637                   break;
2638                 }
2639
2640               if (now->tok == tok_include)
2641                 {
2642                   /* We have to include locale.  */
2643                   const char *locale_name;
2644                   const char *repertoire_name;
2645
2646                   now = lr_token (ldfile, charmap, NULL);
2647                   /* This should be a string or an identifier.  In any
2648                      case something to name a locale.  */
2649                   if (now->tok != tok_string && now->tok != tok_ident)
2650                     {
2651                     translit_syntax:
2652                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2653                       lr_ignore_rest (ldfile, 0);
2654                       continue;
2655                     }
2656                   locale_name = now->val.str.startmb;
2657
2658                   /* Next should be a semicolon.  */
2659                   now = lr_token (ldfile, charmap, NULL);
2660                   if (now->tok != tok_semicolon)
2661                     goto translit_syntax;
2662
2663                   /* Now the repertoire name.  */
2664                   now = lr_token (ldfile, charmap, NULL);
2665                   if ((now->tok != tok_string && now->tok != tok_ident)
2666                       || now->val.str.startmb == NULL)
2667                     goto translit_syntax;
2668                   repertoire_name = now->val.str.startmb;
2669
2670                   /* We must not have more than one `include'.  */
2671                   if (ctype->translit_copy_locale != NULL)
2672                     {
2673                       lr_error (ldfile, _("\
2674 %s: only one `include' instruction allowed"), "LC_CTYPE");
2675                       lr_ignore_rest (ldfile, 0);
2676                       continue;
2677                     }
2678
2679                   ctype->translit_copy_locale = locale_name;
2680                   ctype->translit_copy_repertoire = repertoire_name;
2681
2682                   /* The rest of the line must be empty.  */
2683                   lr_ignore_rest (ldfile, 1);
2684
2685                   /* Make sure the locale is read.  */
2686                   add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
2687                                    repertoire_name, 1, NULL);
2688                   continue;
2689                 }
2690               else if (now->tok == tok_default_missing)
2691                 {
2692                   uint32_t *wstr;
2693
2694                   while (1)
2695                     {
2696                       /* We expect a single character or string as the
2697                          argument.  */
2698                       now = lr_token (ldfile, charmap, NULL);
2699                       wstr = read_widestring (ldfile, now, charmap,
2700                                               repertoire);
2701
2702                       if (wstr != NULL)
2703                         {
2704                           if (ctype->default_missing != NULL)
2705                             {
2706                               lr_error (ldfile, _("\
2707 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2708                               error_at_line (0, 0, ctype->default_missing_file,
2709                                              ctype->default_missing_lineno,
2710                                              _("\
2711 previous definition was here"));
2712                             }
2713                           else
2714                             {
2715                               ctype->default_missing = wstr;
2716                               ctype->default_missing_file = ldfile->fname;
2717                               ctype->default_missing_lineno = ldfile->lineno;
2718                             }
2719                           /* We can have more entries, ignore them.  */
2720                           lr_ignore_rest (ldfile, 0);
2721                           break;
2722                         }
2723                       else if (wstr == (uint32_t *) -1l)
2724                         /* This was an syntax error.  */
2725                         break;
2726
2727                       /* Maybe there is another replacement we can use.  */
2728                       now = lr_token (ldfile, charmap, NULL);
2729                       if (now->tok == tok_eol || now->tok == tok_eof)
2730                         {
2731                           /* Nothing found.  We tell the user.  */
2732                           lr_error (ldfile, _("\
2733 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2734                           break;
2735                         }
2736                       if (now->tok != tok_semicolon)
2737                         goto translit_syntax;
2738                     }
2739
2740                   continue;
2741                 }
2742               else if (now->tok == tok_translit_ignore)
2743                 {
2744                   read_translit_ignore_entry (ldfile, ctype, charmap,
2745                                               repertoire);
2746                   continue;
2747                 }
2748
2749               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2750             }
2751           ldfile->return_widestr = 0;
2752           break;
2753
2754         case tok_ident:
2755           /* Ignore the rest of the line if we don't need the input of
2756              this line.  */
2757           if (ignore_content)
2758             {
2759               lr_ignore_rest (ldfile, 0);
2760               break;
2761             }
2762
2763           /* This could mean one of several things.  First test whether
2764              it's a character class name.  */
2765           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2766             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2767               break;
2768           if (cnt < ctype->nr_charclass)
2769             {
2770               class_bit = _ISwbit (cnt);
2771               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2772               free (now->val.str.startmb);
2773               goto read_charclass;
2774             }
2775           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2776             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2777               break;
2778           if (cnt < ctype->map_collection_nr)
2779             {
2780               mapidx = cnt;
2781               free (now->val.str.startmb);
2782               goto read_mapping;
2783             }
2784 #ifdef PREDEFINED_CLASSES
2785           if (strcmp (now->val.str.startmb, "special1") == 0)
2786             {
2787               class_bit = _ISwspecial1;
2788               free (now->val.str.startmb);
2789               goto read_charclass;
2790             }
2791           if (strcmp (now->val.str.startmb, "special2") == 0)
2792             {
2793               class_bit = _ISwspecial2;
2794               free (now->val.str.startmb);
2795               goto read_charclass;
2796             }
2797           if (strcmp (now->val.str.startmb, "special3") == 0)
2798             {
2799               class_bit = _ISwspecial3;
2800               free (now->val.str.startmb);
2801               goto read_charclass;
2802             }
2803           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2804             {
2805               mapidx = 2;
2806               goto read_mapping;
2807             }
2808 #endif
2809           break;
2810
2811         case tok_end:
2812           /* Next we assume `LC_CTYPE'.  */
2813           now = lr_token (ldfile, charmap, NULL);
2814           if (now->tok == tok_eof)
2815             break;
2816           if (now->tok == tok_eol)
2817             lr_error (ldfile, _("%s: incomplete `END' line"),
2818                       "LC_CTYPE");
2819           else if (now->tok != tok_lc_ctype)
2820             lr_error (ldfile, _("\
2821 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2822           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2823           return;
2824
2825         default:
2826         err_label:
2827           if (now->tok != tok_eof)
2828             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2829         }
2830
2831       /* Prepare for the next round.  */
2832       now = lr_token (ldfile, charmap, NULL);
2833       nowtok = now->tok;
2834     }
2835
2836   /* When we come here we reached the end of the file.  */
2837   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2838 }
2839
2840
2841 static void
2842 set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
2843                     struct repertoire_t *repertoire)
2844 {
2845   size_t cnt;
2846
2847   /* These function defines the default values for the classes and conversions
2848      according to POSIX.2 2.5.2.1.
2849      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2850      Don't move them unless you know what you do!  */
2851
2852   void set_default (int bitpos, int from, int to)
2853     {
2854       char tmp[2];
2855       int ch;
2856       int bit = _ISbit (bitpos);
2857       int bitw = _ISwbit (bitpos);
2858       /* Define string.  */
2859       strcpy (tmp, "?");
2860
2861       for (ch = from; ch <= to; ++ch)
2862         {
2863           struct charseq *seq;
2864           tmp[0] = ch;
2865
2866           seq = charmap_find_value (charmap, tmp, 1);
2867           if (seq == NULL)
2868             {
2869               char buf[10];
2870               sprintf (buf, "U%08X", ch);
2871               seq = charmap_find_value (charmap, buf, 9);
2872             }
2873           if (seq == NULL)
2874             {
2875               if (!be_quiet)
2876                 error (0, 0, _("\
2877 %s: character `%s' not defined in charmap while needed as default value"),
2878                        "LC_CTYPE", tmp);
2879             }
2880           else if (seq->nbytes != 1)
2881             error (0, 0, _("\
2882 %s: character `%s' in charmap not representable with one byte"),
2883                    "LC_CTYPE", tmp);
2884           else
2885             ctype->class256_collection[seq->bytes[0]] |= bit;
2886
2887           /* No need to search here, the ASCII value is also the Unicode
2888              value.  */
2889           ELEM (ctype, class_collection, , ch) |= bitw;
2890         }
2891     }
2892
2893   /* Set default values if keyword was not present.  */
2894   if ((ctype->class_done & BITw (tok_upper)) == 0)
2895     /* "If this keyword [lower] is not specified, the lowercase letters
2896         `A' through `Z', ..., shall automatically belong to this class,
2897         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2898     set_default (BITPOS (tok_upper), 'A', 'Z');
2899
2900   if ((ctype->class_done & BITw (tok_lower)) == 0)
2901     /* "If this keyword [lower] is not specified, the lowercase letters
2902         `a' through `z', ..., shall automatically belong to this class,
2903         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2904     set_default (BITPOS (tok_lower), 'a', 'z');
2905
2906   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2907     {
2908       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2909          class `lower' *must* be in class `alpha'.  */
2910       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2911       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2912
2913       for (cnt = 0; cnt < 256; ++cnt)
2914         if ((ctype->class256_collection[cnt] & mask) != 0)
2915           ctype->class256_collection[cnt] |= BIT (tok_alpha);
2916
2917       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2918         if ((ctype->class_collection[cnt] & maskw) != 0)
2919           ctype->class_collection[cnt] |= BITw (tok_alpha);
2920     }
2921
2922   if ((ctype->class_done & BITw (tok_digit)) == 0)
2923     /* "If this keyword [digit] is not specified, the digits `0' through
2924         `9', ..., shall automatically belong to this class, with
2925         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2926     set_default (BITPOS (tok_digit), '0', '9');
2927
2928   /* "Only characters specified for the `alpha' and `digit' keyword
2929      shall be specified.  Characters specified for the keyword `alpha'
2930      and `digit' are automatically included in this class.  */
2931   {
2932     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2933     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2934
2935     for (cnt = 0; cnt < 256; ++cnt)
2936       if ((ctype->class256_collection[cnt] & mask) != 0)
2937         ctype->class256_collection[cnt] |= BIT (tok_alnum);
2938
2939     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2940       if ((ctype->class_collection[cnt] & maskw) != 0)
2941         ctype->class_collection[cnt] |= BITw (tok_alnum);
2942   }
2943
2944   if ((ctype->class_done & BITw (tok_space)) == 0)
2945     /* "If this keyword [space] is not specified, the characters <space>,
2946         <form-feed>, <newline>, <carriage-return>, <tab>, and
2947         <vertical-tab>, ..., shall automatically belong to this class,
2948         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2949     {
2950       struct charseq *seq;
2951
2952       seq = charmap_find_value (charmap, "space", 5);
2953       if (seq == NULL)
2954         seq = charmap_find_value (charmap, "SP", 2);
2955       if (seq == NULL)
2956         seq = charmap_find_value (charmap, "U00000020", 9);
2957       if (seq == NULL)
2958         {
2959           if (!be_quiet)
2960             error (0, 0, _("\
2961 %s: character `%s' not defined while needed as default value"),
2962                    "LC_CTYPE", "<space>");
2963         }
2964       else if (seq->nbytes != 1)
2965         error (0, 0, _("\
2966 %s: character `%s' in charmap not representable with one byte"),
2967                "LC_CTYPE", "<space>");
2968       else
2969         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2970
2971       /* No need to search.  */
2972       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2973
2974       seq = charmap_find_value (charmap, "form-feed", 9);
2975       if (seq == NULL)
2976         seq = charmap_find_value (charmap, "U0000000C", 9);
2977       if (seq == NULL)
2978         {
2979           if (!be_quiet)
2980             error (0, 0, _("\
2981 %s: character `%s' not defined while needed as default value"),
2982                    "LC_CTYPE", "<form-feed>");
2983         }
2984       else if (seq->nbytes != 1)
2985         error (0, 0, _("\
2986 %s: character `%s' in charmap not representable with one byte"),
2987                "LC_CTYPE", "<form-feed>");
2988       else
2989         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2990
2991       /* No need to search.  */
2992       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
2993
2994
2995       seq = charmap_find_value (charmap, "newline", 7);
2996       if (seq == NULL)
2997         seq = charmap_find_value (charmap, "U0000000A", 9);
2998       if (seq == NULL)
2999         {
3000           if (!be_quiet)
3001             error (0, 0, _("\
3002 character `%s' not defined while needed as default value"),
3003                    "<newline>");
3004         }
3005       else if (seq->nbytes != 1)
3006         error (0, 0, _("\
3007 %s: character `%s' in charmap not representable with one byte"),
3008                "LC_CTYPE", "<newline>");
3009       else
3010         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3011
3012       /* No need to search.  */
3013       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3014
3015
3016       seq = charmap_find_value (charmap, "carriage-return", 15);
3017       if (seq == NULL)
3018         seq = charmap_find_value (charmap, "U0000000D", 9);
3019       if (seq == NULL)
3020         {
3021           if (!be_quiet)
3022             error (0, 0, _("\
3023 %s: character `%s' not defined while needed as default value"),
3024                    "LC_CTYPE", "<carriage-return>");
3025         }
3026       else if (seq->nbytes != 1)
3027         error (0, 0, _("\
3028 %s: character `%s' in charmap not representable with one byte"),
3029                "LC_CTYPE", "<carriage-return>");
3030       else
3031         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3032
3033       /* No need to search.  */
3034       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3035
3036
3037       seq = charmap_find_value (charmap, "tab", 3);
3038       if (seq == NULL)
3039         seq = charmap_find_value (charmap, "U00000009", 9);
3040       if (seq == NULL)
3041         {
3042           if (!be_quiet)
3043             error (0, 0, _("\
3044 %s: character `%s' not defined while needed as default value"),
3045                    "LC_CTYPE", "<tab>");
3046         }
3047       else if (seq->nbytes != 1)
3048         error (0, 0, _("\
3049 %s: character `%s' in charmap not representable with one byte"),
3050                "LC_CTYPE", "<tab>");
3051       else
3052         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3053
3054       /* No need to search.  */
3055       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3056
3057
3058       seq = charmap_find_value (charmap, "vertical-tab", 12);
3059       if (seq == NULL)
3060         seq = charmap_find_value (charmap, "U0000000B", 9);
3061       if (seq == NULL)
3062         {
3063           if (!be_quiet)
3064             error (0, 0, _("\
3065 %s: character `%s' not defined while needed as default value"),
3066                    "LC_CTYPE", "<vertical-tab>");
3067         }
3068       else if (seq->nbytes != 1)
3069         error (0, 0, _("\
3070 %s: character `%s' in charmap not representable with one byte"),
3071                "LC_CTYPE", "<vertical-tab>");
3072       else
3073         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3074
3075       /* No need to search.  */
3076       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3077     }
3078
3079   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3080     /* "If this keyword is not specified, the digits `0' to `9', the
3081         uppercase letters `A' through `F', and the lowercase letters `a'
3082         through `f', ..., shell automatically belong to this class, with
3083         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3084     {
3085       set_default (BITPOS (tok_xdigit), '0', '9');
3086       set_default (BITPOS (tok_xdigit), 'A', 'F');
3087       set_default (BITPOS (tok_xdigit), 'a', 'f');
3088     }
3089
3090   if ((ctype->class_done & BITw (tok_blank)) == 0)
3091     /* "If this keyword [blank] is unspecified, the characters <space> and
3092        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3093    {
3094       struct charseq *seq;
3095
3096       seq = charmap_find_value (charmap, "space", 5);
3097       if (seq == NULL)
3098         seq = charmap_find_value (charmap, "SP", 2);
3099       if (seq == NULL)
3100         seq = charmap_find_value (charmap, "U00000020", 9);
3101       if (seq == NULL)
3102         {
3103           if (!be_quiet)
3104             error (0, 0, _("\
3105 %s: character `%s' not defined while needed as default value"),
3106                    "LC_CTYPE", "<space>");
3107         }
3108       else if (seq->nbytes != 1)
3109         error (0, 0, _("\
3110 %s: character `%s' in charmap not representable with one byte"),
3111                "LC_CTYPE", "<space>");
3112       else
3113         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3114
3115       /* No need to search.  */
3116       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3117
3118
3119       seq = charmap_find_value (charmap, "tab", 3);
3120       if (seq == NULL)
3121         seq = charmap_find_value (charmap, "U00000009", 9);
3122       if (seq == NULL)
3123         {
3124           if (!be_quiet)
3125             error (0, 0, _("\
3126 %s: character `%s' not defined while needed as default value"),
3127                    "LC_CTYPE", "<tab>");
3128         }
3129       else if (seq->nbytes != 1)
3130         error (0, 0, _("\
3131 %s: character `%s' in charmap not representable with one byte"),
3132                "LC_CTYPE", "<tab>");
3133       else
3134         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3135
3136       /* No need to search.  */
3137       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3138     }
3139
3140   if ((ctype->class_done & BITw (tok_graph)) == 0)
3141     /* "If this keyword [graph] is not specified, characters specified for
3142         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3143         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3144     {
3145       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3146         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3147       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3148         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3149         BITw (tok_punct);
3150       size_t cnt;
3151
3152       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3153         if ((ctype->class_collection[cnt] & maskw) != 0)
3154           ctype->class_collection[cnt] |= BITw (tok_graph);
3155
3156       for (cnt = 0; cnt < 256; ++cnt)
3157         if ((ctype->class256_collection[cnt] & mask) != 0)
3158           ctype->class256_collection[cnt] |= BIT (tok_graph);
3159     }
3160
3161   if ((ctype->class_done & BITw (tok_print)) == 0)
3162     /* "If this keyword [print] is not provided, characters specified for
3163         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3164         and the <space> character shall belong to this character class."
3165         [P1003.2, 2.5.2.1]  */
3166     {
3167       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3168         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3169       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3170         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3171         BITw (tok_punct);
3172       size_t cnt;
3173       struct charseq *seq;
3174
3175       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3176         if ((ctype->class_collection[cnt] & maskw) != 0)
3177           ctype->class_collection[cnt] |= BITw (tok_print);
3178
3179       for (cnt = 0; cnt < 256; ++cnt)
3180         if ((ctype->class256_collection[cnt] & mask) != 0)
3181           ctype->class256_collection[cnt] |= BIT (tok_print);
3182
3183
3184       seq = charmap_find_value (charmap, "space", 5);
3185       if (seq == NULL)
3186         seq = charmap_find_value (charmap, "SP", 2);
3187       if (seq == NULL)
3188         seq = charmap_find_value (charmap, "U00000020", 9);
3189       if (seq == NULL)
3190         {
3191           if (!be_quiet)
3192             error (0, 0, _("\
3193 %s: character `%s' not defined while needed as default value"),
3194                    "LC_CTYPE", "<space>");
3195         }
3196       else if (seq->nbytes != 1)
3197         error (0, 0, _("\
3198 %s: character `%s' in charmap not representable with one byte"),
3199                "LC_CTYPE", "<space>");
3200       else
3201         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3202
3203       /* No need to search.  */
3204       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3205     }
3206
3207   if (ctype->tomap_done[0] == 0)
3208     /* "If this keyword [toupper] is not specified, the lowercase letters
3209         `a' through `z', and their corresponding uppercase letters `A' to
3210         `Z', ..., shall automatically be included, with implementation-
3211         defined character values."  [P1003.2, 2.5.2.1]  */
3212     {
3213       char tmp[4];
3214       int ch;
3215
3216       strcpy (tmp, "<?>");
3217
3218       for (ch = 'a'; ch <= 'z'; ++ch)
3219         {
3220           struct charseq *seq_from, *seq_to;
3221
3222           tmp[1] = (char) ch;
3223
3224           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3225           if (seq_from == NULL)
3226             {
3227               char buf[10];
3228               sprintf (buf, "U%08X", ch);
3229               seq_from = charmap_find_value (charmap, buf, 9);
3230             }
3231           if (seq_from == NULL)
3232             {
3233               if (!be_quiet)
3234                 error (0, 0, _("\
3235 %s: character `%s' not defined while needed as default value"),
3236                        "LC_CTYPE", tmp);
3237             }
3238           else if (seq_from->nbytes != 1)
3239             {
3240               if (!be_quiet)
3241                 error (0, 0, _("\
3242 %s: character `%s' needed as default value not representable with one byte"),
3243                        "LC_CTYPE", tmp);
3244             }
3245           else
3246             {
3247               /* This conversion is implementation defined.  */
3248               tmp[1] = (char) (ch + ('A' - 'a'));
3249               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3250               if (seq_to == NULL)
3251                 {
3252                   char buf[10];
3253                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3254                   seq_to = charmap_find_value (charmap, buf, 9);
3255                 }
3256               if (seq_to == NULL)
3257                 {
3258                   if (!be_quiet)
3259                     error (0, 0, _("\
3260 %s: character `%s' not defined while needed as default value"),
3261                            "LC_CTYPE", tmp);
3262                 }
3263               else if (seq_to->nbytes != 1)
3264                 {
3265                   if (!be_quiet)
3266                     error (0, 0, _("\
3267 %s: character `%s' needed as default value not representable with one byte"),
3268                            "LC_CTYPE", tmp);
3269                 }
3270               else
3271                 /* The index [0] is determined by the order of the
3272                    `ctype_map_newP' calls in `ctype_startup'.  */
3273                 ctype->map256_collection[0][seq_from->bytes[0]]
3274                   = seq_to->bytes[0];
3275             }
3276
3277           /* No need to search.  */
3278           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3279         }
3280     }
3281
3282   if (ctype->tomap_done[1] == 0)
3283     /* "If this keyword [tolower] is not specified, the mapping shall be
3284        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3285     {
3286       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3287         if (ctype->map_collection[0][cnt] != 0)
3288           ELEM (ctype, map_collection, [1],
3289                 ctype->map_collection[0][cnt])
3290             = ctype->charnames[cnt];
3291
3292       for (cnt = 0; cnt < 256; ++cnt)
3293         if (ctype->map256_collection[0][cnt] != 0)
3294           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3295     }
3296
3297   if (ctype->outdigits_act != 10)
3298     {
3299       if (ctype->outdigits_act != 0)
3300         error (0,0, _("%s: field `%s' does not contain exactly ten entries"),
3301                "LC_CTYPE", "outdigit");
3302
3303       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3304         {
3305           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3306                                                          digits + cnt, 1);
3307
3308           if (ctype->mboutdigits[cnt] == NULL)
3309             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3310                                                            longnames[cnt],
3311                                                            strlen (longnames[cnt]));
3312
3313           if (ctype->mboutdigits[cnt] == NULL)
3314             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3315                                                            uninames[cnt], 9);
3316
3317           if (ctype->mboutdigits[cnt] == NULL)
3318             {
3319               /* Provide a replacement.  */
3320               error (0, 0, _("\
3321 no output digits defined and none of the standard names in the charmap"));
3322
3323               ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
3324                                                        sizeof (struct charseq)
3325                                                        + 1);
3326
3327               /* This is better than nothing.  */
3328               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3329               ctype->mboutdigits[cnt]->nbytes = 1;
3330             }
3331
3332           ctype->wcoutdigits[cnt] = L'0' + cnt;
3333         }
3334
3335       ctype->outdigits_act = 10;
3336     }
3337 }
3338
3339
3340 /* Construction of sparse 3-level tables.
3341    See wchar-lookup.h for their structure and the meaning of p and q.  */
3342
3343 struct wctype_table
3344 {
3345   /* Parameters.  */
3346   unsigned int p;
3347   unsigned int q;
3348   /* Working representation.  */
3349   size_t level1_alloc;
3350   size_t level1_size;
3351   uint32_t *level1;
3352   size_t level2_alloc;
3353   size_t level2_size;
3354   uint32_t *level2;
3355   size_t level3_alloc;
3356   size_t level3_size;
3357   uint32_t *level3;
3358   /* Compressed representation.  */
3359   size_t result_size;
3360   char *result;
3361 };
3362
3363 /* Initialize.  Assumes t->p and t->q have already been set.  */
3364 static inline void
3365 wctype_table_init (struct wctype_table *t)
3366 {
3367   t->level1_alloc = t->level1_size = 0;
3368   t->level2_alloc = t->level2_size = 0;
3369   t->level3_alloc = t->level3_size = 0;
3370 }
3371
3372 /* Retrieve an entry.  */
3373 static inline int
3374 wctype_table_get (struct wctype_table *t, uint32_t wc)
3375 {
3376   uint32_t index1 = wc >> (t->q + t->p + 5);
3377   if (index1 < t->level1_size)
3378     {
3379       uint32_t lookup1 = t->level1[index1];
3380       if (lookup1 != ~((uint32_t) 0))
3381         {
3382           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3383                             + (lookup1 << t->q);
3384           uint32_t lookup2 = t->level2[index2];
3385           if (lookup2 != ~((uint32_t) 0))
3386             {
3387               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3388                                 + (lookup2 << t->p);
3389               uint32_t lookup3 = t->level3[index3];
3390               uint32_t index4 = wc & 0x1f;
3391
3392               return (lookup3 >> index4) & 1;
3393             }
3394         }
3395     }
3396   return 0;
3397 }
3398
3399 /* Add one entry.  */
3400 static void
3401 wctype_table_add (struct wctype_table *t, uint32_t wc)
3402 {
3403   uint32_t index1 = wc >> (t->q + t->p + 5);
3404   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3405   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3406   uint32_t index4 = wc & 0x1f;
3407   size_t i, i1, i2;
3408
3409   if (index1 >= t->level1_size)
3410     {
3411       if (index1 >= t->level1_alloc)
3412         {
3413           size_t alloc = 2 * t->level1_alloc;
3414           if (alloc <= index1)
3415             alloc = index1 + 1;
3416           t->level1 = (t->level1_alloc > 0
3417                        ? (uint32_t *) xrealloc ((char *) t->level1,
3418                                                 alloc * sizeof (uint32_t))
3419                        : (uint32_t *) xmalloc (alloc * sizeof (uint32_t)));
3420           t->level1_alloc = alloc;
3421         }
3422       while (index1 >= t->level1_size)
3423         t->level1[t->level1_size++] = ~((uint32_t) 0);
3424     }
3425
3426   if (t->level1[index1] == ~((uint32_t) 0))
3427     {
3428       if (t->level2_size == t->level2_alloc)
3429         {
3430           size_t alloc = 2 * t->level2_alloc + 1;
3431           t->level2 = (t->level2_alloc > 0
3432                        ? (uint32_t *) xrealloc ((char *) t->level2,
3433                                                 (alloc << t->q) * sizeof (uint32_t))
3434                        : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t)));
3435           t->level2_alloc = alloc;
3436         }
3437       i1 = t->level2_size << t->q;
3438       i2 = (t->level2_size + 1) << t->q;
3439       for (i = i1; i < i2; i++)
3440         t->level2[i] = ~((uint32_t) 0);
3441       t->level1[index1] = t->level2_size++;
3442     }
3443
3444   index2 += t->level1[index1] << t->q;
3445
3446   if (t->level2[index2] == ~((uint32_t) 0))
3447     {
3448       if (t->level3_size == t->level3_alloc)
3449         {
3450           size_t alloc = 2 * t->level3_alloc + 1;
3451           t->level3 = (t->level3_alloc > 0
3452                        ? (uint32_t *) xrealloc ((char *) t->level3,
3453                                                 (alloc << t->p) * sizeof (uint32_t))
3454                        : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t)));
3455           t->level3_alloc = alloc;
3456         }
3457       i1 = t->level3_size << t->p;
3458       i2 = (t->level3_size + 1) << t->p;
3459       for (i = i1; i < i2; i++)
3460         t->level3[i] = 0;
3461       t->level2[index2] = t->level3_size++;
3462     }
3463
3464   index3 += t->level2[index2] << t->p;
3465
3466   t->level3[index3] |= (uint32_t)1 << index4;
3467 }
3468
3469 /* Finalize and shrink.  */
3470 static void
3471 wctype_table_finalize (struct wctype_table *t)
3472 {
3473   size_t i, j, k;
3474   uint32_t reorder3[t->level3_size];
3475   uint32_t reorder2[t->level2_size];
3476   uint32_t level1_offset, level2_offset, level3_offset;
3477
3478   /* Uniquify level3 blocks.  */
3479   k = 0;
3480   for (j = 0; j < t->level3_size; j++)
3481     {
3482       for (i = 0; i < k; i++)
3483         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3484                     (1 << t->p) * sizeof (uint32_t)) == 0)
3485           break;
3486       /* Relocate block j to block i.  */
3487       reorder3[j] = i;
3488       if (i == k)
3489         {
3490           if (i != j)
3491             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3492                     (1 << t->p) * sizeof (uint32_t));
3493           k++;
3494         }
3495     }
3496   t->level3_size = k;
3497
3498   for (i = 0; i < (t->level2_size << t->q); i++)
3499     if (t->level2[i] != ~((uint32_t) 0))
3500       t->level2[i] = reorder3[t->level2[i]];
3501
3502   /* Uniquify level2 blocks.  */
3503   k = 0;
3504   for (j = 0; j < t->level2_size; j++)
3505     {
3506       for (i = 0; i < k; i++)
3507         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3508                     (1 << t->q) * sizeof (uint32_t)) == 0)
3509           break;
3510       /* Relocate block j to block i.  */
3511       reorder2[j] = i;
3512       if (i == k)
3513         {
3514           if (i != j)
3515             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3516                     (1 << t->q) * sizeof (uint32_t));
3517           k++;
3518         }
3519     }
3520   t->level2_size = k;
3521
3522   for (i = 0; i < t->level1_size; i++)
3523     if (t->level1[i] != ~((uint32_t) 0))
3524       t->level1[i] = reorder2[t->level1[i]];
3525
3526   /* Create and fill the resulting compressed representation.  */
3527   t->result_size =
3528     5 * sizeof (uint32_t)
3529     + t->level1_size * sizeof (uint32_t)
3530     + (t->level2_size << t->q) * sizeof (uint32_t)
3531     + (t->level3_size << t->p) * sizeof (uint32_t);
3532   t->result = (char *) xmalloc (t->result_size);
3533
3534   level1_offset =
3535     5 * sizeof (uint32_t);
3536   level2_offset =
3537     5 * sizeof (uint32_t)
3538     + t->level1_size * sizeof (uint32_t);
3539   level3_offset =
3540     5 * sizeof (uint32_t)
3541     + t->level1_size * sizeof (uint32_t)
3542     + (t->level2_size << t->q) * sizeof (uint32_t);
3543
3544   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3545   ((uint32_t *) t->result)[1] = t->level1_size;
3546   ((uint32_t *) t->result)[2] = t->p + 5;
3547   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3548   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3549
3550   for (i = 0; i < t->level1_size; i++)
3551     ((uint32_t *) (t->result + level1_offset))[i] =
3552       (t->level1[i] == ~((uint32_t) 0)
3553        ? 0
3554        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3555
3556   for (i = 0; i < (t->level2_size << t->q); i++)
3557     ((uint32_t *) (t->result + level2_offset))[i] =
3558       (t->level2[i] == ~((uint32_t) 0)
3559        ? 0
3560        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3561
3562   for (i = 0; i < (t->level3_size << t->p); i++)
3563     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3564
3565   if (t->level1_alloc > 0)
3566     free (t->level1);
3567   if (t->level2_alloc > 0)
3568     free (t->level2);
3569   if (t->level3_alloc > 0)
3570     free (t->level3);
3571 }
3572
3573 #define TABLE wcwidth_table
3574 #define ELEMENT uint8_t
3575 #define DEFAULT 0xff
3576 #include "3level.h"
3577
3578 #define TABLE wctrans_table
3579 #define ELEMENT int32_t
3580 #define DEFAULT 0
3581 #define wctrans_table_add wctrans_table_add_internal
3582 #include "3level.h"
3583 #undef wctrans_table_add
3584 /* The wctrans_table must actually store the difference between the
3585    desired result and the argument.  */
3586 static inline void
3587 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3588 {
3589   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3590 }
3591
3592
3593 static void
3594 allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
3595                  struct repertoire_t *repertoire)
3596 {
3597   size_t idx, nr;
3598   const void *key;
3599   size_t len;
3600   void *vdata;
3601   void *curs;
3602
3603   /* You wonder about this amount of memory?  This is only because some
3604      users do not manage to address the array with unsigned values or
3605      data types with range >= 256.  '\200' would result in the array
3606      index -128.  To help these poor people we duplicate the entries for
3607      128 up to 255 below the entry for \0.  */
3608   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3609   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3610   ctype->class_b = (uint32_t **)
3611     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3612   ctype->class_3level = (struct iovec *)
3613     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3614
3615   /* This is the array accessed using the multibyte string elements.  */
3616   for (idx = 0; idx < 256; ++idx)
3617     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3618
3619   /* Mirror first 127 entries.  We must take care that entry -1 is not
3620      mirrored because EOF == -1.  */
3621   for (idx = 0; idx < 127; ++idx)
3622     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3623
3624   /* The 32 bit array contains all characters < 0x100.  */
3625   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3626     if (ctype->charnames[idx] < 0x100)
3627       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3628
3629   for (nr = 0; nr < ctype->nr_charclass; nr++)
3630     {
3631       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3632
3633       for (idx = 0; idx < 256; ++idx)
3634         if (ctype->class256_collection[idx] & _ISbit (nr))
3635           ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
3636     }
3637
3638   for (nr = 0; nr < ctype->nr_charclass; nr++)
3639     {
3640       struct wctype_table t;
3641
3642       t.p = 4; /* or: 5 */
3643       t.q = 7; /* or: 6 */
3644       wctype_table_init (&t);
3645
3646       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3647         if (ctype->class_collection[idx] & _ISwbit (nr))
3648           wctype_table_add (&t, ctype->charnames[idx]);
3649
3650       wctype_table_finalize (&t);
3651
3652       if (verbose)
3653         fprintf (stderr, _("%s: table for class \"%s\": %lu bytes\n"),
3654                  "LC_CTYPE", ctype->classnames[nr],
3655                  (unsigned long int) t.result_size);
3656
3657       ctype->class_3level[nr].iov_base = t.result;
3658       ctype->class_3level[nr].iov_len = t.result_size;
3659     }
3660
3661   /* Room for table of mappings.  */
3662   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3663   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3664                                           * sizeof (uint32_t *));
3665   ctype->map_3level = (struct iovec *)
3666     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3667
3668   /* Fill in all mappings.  */
3669   for (idx = 0; idx < 2; ++idx)
3670     {
3671       unsigned int idx2;
3672
3673       /* Allocate table.  */
3674       ctype->map_b[idx] = (uint32_t *)
3675         xmalloc ((256 + 128) * sizeof (uint32_t));
3676
3677       /* Copy values from collection.  */
3678       for (idx2 = 0; idx2 < 256; ++idx2)
3679         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3680
3681       /* Mirror first 127 entries.  We must take care not to map entry
3682          -1 because EOF == -1.  */
3683       for (idx2 = 0; idx2 < 127; ++idx2)
3684         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3685
3686       /* EOF must map to EOF.  */
3687       ctype->map_b[idx][127] = EOF;
3688     }
3689
3690   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3691     {
3692       unsigned int idx2;
3693
3694       /* Allocate table.  */
3695       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3696
3697       /* Copy values from collection.  Default is identity mapping.  */
3698       for (idx2 = 0; idx2 < 256; ++idx2)
3699         ctype->map32_b[idx][idx2] =
3700           (ctype->map_collection[idx][idx2] != 0
3701            ? ctype->map_collection[idx][idx2]
3702            : idx2);
3703     }
3704
3705   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3706     {
3707       struct wctrans_table t;
3708
3709       t.p = 7;
3710       t.q = 9;
3711       wctrans_table_init (&t);
3712
3713       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3714         if (ctype->map_collection[nr][idx] != 0)
3715           wctrans_table_add (&t, ctype->charnames[idx],
3716                              ctype->map_collection[nr][idx]);
3717
3718       wctrans_table_finalize (&t);
3719
3720       if (verbose)
3721         fprintf (stderr, _("%s: table for map \"%s\": %lu bytes\n"),
3722                  "LC_CTYPE", ctype->mapnames[nr],
3723                  (unsigned long int) t.result_size);
3724
3725       ctype->map_3level[nr].iov_base = t.result;
3726       ctype->map_3level[nr].iov_len = t.result_size;
3727     }
3728
3729   /* Extra array for class and map names.  */
3730   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3731                                                 * sizeof (uint32_t));
3732   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3733                                               * sizeof (uint32_t));
3734
3735   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3736   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3737
3738   /* Array for width information.  Because the expected width are very
3739      small we use only one single byte.  This saves space.  */
3740   {
3741     struct wcwidth_table t;
3742
3743     t.p = 7;
3744     t.q = 9;
3745     wcwidth_table_init (&t);
3746
3747     /* First set all the characters of the character set to the default width.  */
3748     curs = NULL;
3749     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3750       {
3751         struct charseq *data = (struct charseq *) vdata;
3752
3753         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3754           data->ucs4 = repertoire_find_value (ctype->repertoire,
3755                                               data->name, len);
3756
3757         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3758           wcwidth_table_add (&t, data->ucs4, charmap->width_default);
3759       }
3760
3761     /* Now add the explicitly specified widths.  */
3762     if (charmap->width_rules != NULL)
3763       {
3764         size_t cnt;
3765
3766         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3767           {
3768             unsigned char bytes[charmap->mb_cur_max];
3769             int nbytes = charmap->width_rules[cnt].from->nbytes;
3770
3771             /* We have the range of character for which the width is
3772                specified described using byte sequences of the multibyte
3773                charset.  We have to convert this to UCS4 now.  And we
3774                cannot simply convert the beginning and the end of the
3775                sequence, we have to iterate over the byte sequence and
3776                convert it for every single character.  */
3777             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3778
3779             while (nbytes < charmap->width_rules[cnt].to->nbytes
3780                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3781                               nbytes) <= 0)
3782               {
3783                 /* Find the UCS value for `bytes'.  */
3784                 int inner;
3785                 uint32_t wch;
3786                 struct charseq *seq =
3787                   charmap_find_symbol (charmap, bytes, nbytes);
3788
3789                 if (seq == NULL)
3790                   wch = ILLEGAL_CHAR_VALUE;
3791                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3792                   wch = seq->ucs4;
3793                 else
3794                   wch = repertoire_find_value (ctype->repertoire, seq->name,
3795                                                strlen (seq->name));
3796
3797                 if (wch != ILLEGAL_CHAR_VALUE)
3798                   /* Store the value.  */
3799                   wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
3800
3801                 /* "Increment" the bytes sequence.  */
3802                 inner = nbytes - 1;
3803                 while (inner >= 0 && bytes[inner] == 0xff)
3804                   --inner;
3805
3806                 if (inner < 0)
3807                   {
3808                     /* We have to extend the byte sequence.  */
3809                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3810                       break;
3811
3812                     bytes[0] = 1;
3813                     memset (&bytes[1], 0, nbytes);
3814                     ++nbytes;
3815                   }
3816                 else
3817                   {
3818                     ++bytes[inner];
3819                     while (++inner < nbytes)
3820                       bytes[inner] = 0;
3821                   }
3822               }
3823           }
3824       }
3825
3826     wcwidth_table_finalize (&t);
3827
3828     if (verbose)
3829       fprintf (stderr, _("%s: table for width: %lu bytes\n"),
3830                "LC_CTYPE", (unsigned long int) t.result_size);
3831
3832     ctype->width.iov_base = t.result;
3833     ctype->width.iov_len = t.result_size;
3834   }
3835
3836   /* Set MB_CUR_MAX.  */
3837   ctype->mb_cur_max = charmap->mb_cur_max;
3838
3839   /* Now determine the table for the transliteration information.
3840
3841      XXX It is not yet clear to me whether it is worth implementing a
3842      complicated algorithm which uses a hash table to locate the entries.
3843      For now I'll use a simple array which can be searching using binary
3844      search.  */
3845   if (ctype->translit_copy_locale != NULL)
3846     {
3847       /* Fold in the transliteration information from the locale mentioned
3848          in the `include' statement.  */
3849       struct locale_ctype_t *here = ctype;
3850
3851       do
3852         {
3853           struct localedef_t *other = find_locale (LC_CTYPE,
3854                                                    here->translit_copy_locale,
3855                                                    repertoire->name, charmap);
3856
3857           if (other == NULL)
3858             {
3859               error (0, 0, _("\
3860 %s: transliteration data from locale `%s' not available"),
3861                      "LC_CTYPE", here->translit_copy_locale);
3862               break;
3863             }
3864
3865           here = other->categories[LC_CTYPE].ctype;
3866
3867           /* Enqueue the information if necessary.  */
3868           if (here->translit != NULL)
3869             {
3870               struct translit_t *endp = here->translit;
3871               while (endp->next != NULL)
3872                 endp = endp->next;
3873
3874               endp->next = ctype->translit;
3875               ctype->translit = here->translit;
3876             }
3877         }
3878       while (here->translit_copy_locale != NULL);
3879     }
3880
3881   if (ctype->translit != NULL)
3882     {
3883       /* First count how many entries we have.  This is the upper limit
3884          since some entries from the included files might be overwritten.  */
3885       size_t number = 0;
3886       size_t cnt;
3887       struct translit_t *runp = ctype->translit;
3888       struct translit_t **sorted;
3889       size_t from_len, to_len;
3890
3891       while (runp != NULL)
3892         {
3893           ++number;
3894           runp = runp->next;
3895         }
3896
3897       /* Next we allocate an array large enough and fill in the values.  */
3898       sorted = (struct translit_t **) alloca (number
3899                                               * sizeof (struct translit_t **));
3900       runp = ctype->translit;
3901       number = 0;
3902       do
3903         {
3904           /* Search for the place where to insert this string.
3905              XXX Better use a real sorting algorithm later.  */
3906           size_t idx = 0;
3907           int replace = 0;
3908
3909           while (idx < number)
3910             {
3911               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3912                                 (const wchar_t *) runp->from);
3913               if (res == 0)
3914                 {
3915                   replace = 1;
3916                   break;
3917                 }
3918               if (res > 0)
3919                 break;
3920               ++idx;
3921             }
3922
3923           if (replace)
3924             sorted[idx] = runp;
3925           else
3926             {
3927               memmove (&sorted[idx + 1], &sorted[idx],
3928                        (number - idx) * sizeof (struct translit_t *));
3929               sorted[idx] = runp;
3930               ++number;
3931             }
3932
3933           runp = runp->next;
3934         }
3935       while (runp != NULL);
3936
3937       /* The next step is putting all the possible transliteration
3938          strings in one memory block so that we can write it out.
3939          We need several different blocks:
3940          - index to the from-string array
3941          - from-string array
3942          - index to the to-string array
3943          - to-string array.
3944       */
3945       from_len = to_len = 0;
3946       for (cnt = 0; cnt < number; ++cnt)
3947         {
3948           struct translit_to_t *srunp;
3949           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3950           srunp = sorted[cnt]->to;
3951           while (srunp != NULL)
3952             {
3953               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3954               srunp = srunp->next;
3955             }
3956           /* Plus one for the extra NUL character marking the end of
3957              the list for the current entry.  */
3958           ++to_len;
3959         }
3960
3961       /* We can allocate the arrays for the results.  */
3962       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3963       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3964       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3965       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3966
3967       from_len = 0;
3968       to_len = 0;
3969       for (cnt = 0; cnt < number; ++cnt)
3970         {
3971           size_t len;
3972           struct translit_to_t *srunp;
3973
3974           ctype->translit_from_idx[cnt] = from_len;
3975           ctype->translit_to_idx[cnt] = to_len;
3976
3977           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3978           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3979                    (const wchar_t *) sorted[cnt]->from, len);
3980           from_len += len;
3981
3982           ctype->translit_to_idx[cnt] = to_len;
3983           srunp = sorted[cnt]->to;
3984           while (srunp != NULL)
3985             {
3986               len = wcslen ((const wchar_t *) srunp->str) + 1;
3987               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3988                        (const wchar_t *) srunp->str, len);
3989               to_len += len;
3990               srunp = srunp->next;
3991             }
3992           ctype->translit_to_tbl[to_len++] = L'\0';
3993         }
3994
3995       /* Store the information about the length.  */
3996       ctype->translit_idx_size = number;
3997       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3998       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3999     }
4000   else
4001     {
4002       /* Provide some dummy pointers since we have nothing to write out.  */
4003       static uint32_t no_str = { 0 };
4004
4005       ctype->translit_from_idx = &no_str;
4006       ctype->translit_from_tbl = &no_str;
4007       ctype->translit_to_tbl = &no_str;
4008       ctype->translit_idx_size = 0;
4009       ctype->translit_from_tbl_size = 0;
4010       ctype->translit_to_tbl_size = 0;
4011     }
4012 }