locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28 #include <sys/param.h>
  29
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "localedef.h"
  35 #include "elem-hash.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  55 {
  56   if (sizeof (int32_t) == sizeof (int))
  57     obstack_int_grow_fast (obstack, data);
  58   else
  59     obstack_grow (obstack, &data, sizeof (int32_t));
  60 }
  61
  62 /* Forward declaration.  */
  63 struct element_t;
  64
  65 /* Data type for list of strings.  */
  66 struct section_list
  67 {
  68   struct section_list *def_next;
  69   struct section_list *next;
  70   /* Name of the section.  */
  71   const char *name;
  72   /* First element of this section.  */
  73   struct element_t *first;
  74   /* Last element of this section.  */
  75   struct element_t *last;
  76   /* These are the rules for this section.  */
  77   enum coll_sort_rule *rules;
  78   /* Index of the rule set in the appropriate section of the output file.  */
  79   int ruleidx;
  80 };
  81
  82 struct element_t;
  83
  84 struct element_list_t
  85 {
  86   /* Number of elements.  */
  87   int cnt;
  88
  89   struct element_t **w;
  90 };
  91
  92 /* Data type for collating element.  */
  93 struct element_t
  94 {
  95   const char *name;
  96
  97   const char *mbs;
  98   size_t nmbs;
  99   const uint32_t *wcs;
 100   size_t nwcs;
 101   int *mborder;
 102   int wcorder;
 103
 104   /* The following is a bit mask which bits are set if this element is
 105      used in the appropriate level.  Interesting for the singlebyte
 106      weight computation.
 107
 108      XXX The type here restricts the number of levels to 32.  It could
 109      be changed if necessary but I doubt this is necessary.  */
 110   unsigned int used_in_level;
 111
 112   struct element_list_t *weights;
 113
 114   /* Nonzero if this is a real character definition.  */
 115   int is_character;
 116
 117   /* Order of the character in the sequence.  This information will
 118      be used in range expressions.  */
 119   int mbseqorder;
 120   int wcseqorder;
 121
 122   /* Where does the definition come from.  */
 123   const char *file;
 124   size_t line;
 125
 126   /* Which section does this belong to.  */
 127   struct section_list *section;
 128
 129   /* Predecessor and successor in the order list.  */
 130   struct element_t *last;
 131   struct element_t *next;
 132
 133   /* Next element in multibyte output list.  */
 134   struct element_t *mbnext;
 135   struct element_t *mblast;
 136
 137   /* Next element in wide character output list.  */
 138   struct element_t *wcnext;
 139   struct element_t *wclast;
 140 };
 141
 142 /* Special element value.  */
 143 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 144 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 145 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 146
 147 /* Data type for collating symbol.  */
 148 struct symbol_t
 149 {
 150   const char *name;
 151
 152   /* Point to place in the order list.  */
 153   struct element_t *order;
 154
 155   /* Where does the definition come from.  */
 156   const char *file;
 157   size_t line;
 158 };
 159
 160 /* Sparse table of struct element_t *.  */
 161 #define TABLE wchead_table
 162 #define ELEMENT struct element_t *
 163 #define DEFAULT NULL
 164 #define ITERATE
 165 #define NO_FINALIZE
 166 #include "3level.h"
 167
 168 /* Sparse table of int32_t.  */
 169 #define TABLE collidx_table
 170 #define ELEMENT int32_t
 171 #define DEFAULT 0
 172 #include "3level.h"
 173
 174 /* Sparse table of uint32_t.  */
 175 #define TABLE collseq_table
 176 #define ELEMENT uint32_t
 177 #define DEFAULT ~((uint32_t) 0)
 178 #include "3level.h"
 179
 180
 181 /* The real definition of the struct for the LC_COLLATE locale.  */
 182 struct locale_collate_t
 183 {
 184   int col_weight_max;
 185   int cur_weight_max;
 186
 187   /* List of known scripts.  */
 188   struct section_list *known_sections;
 189   /* List of used sections.  */
 190   struct section_list *sections;
 191   /* Current section using definition.  */
 192   struct section_list *current_section;
 193   /* There always can be an unnamed section.  */
 194   struct section_list unnamed_section;
 195   /* To make handling of errors easier we have another section.  */
 196   struct section_list error_section;
 197   /* Sometimes we are defining the values for collating symbols before
 198      the first actual section.  */
 199   struct section_list symbol_section;
 200
 201   /* Start of the order list.  */
 202   struct element_t *start;
 203
 204   /* The undefined element.  */
 205   struct element_t undefined;
 206
 207   /* This is the cursor for `reorder_after' insertions.  */
 208   struct element_t *cursor;
 209
 210   /* This value is used when handling ellipsis.  */
 211   struct element_t ellipsis_weight;
 212
 213   /* Known collating elements.  */
 214   hash_table elem_table;
 215
 216   /* Known collating symbols.  */
 217   hash_table sym_table;
 218
 219   /* Known collation sequences.  */
 220   hash_table seq_table;
 221
 222   struct obstack mempool;
 223
 224   /* The LC_COLLATE category is a bit special as it is sometimes possible
 225      that the definitions from more than one input file contains information.
 226      Therefore we keep all relevant input in a list.  */
 227   struct locale_collate_t *next;
 228
 229   /* Arrays with heads of the list for each of the leading bytes in
 230      the multibyte sequences.  */
 231   struct element_t *mbheads[256];
 232
 233   /* Arrays with heads of the list for each of the leading bytes in
 234      the multibyte sequences.  */
 235   struct wchead_table wcheads;
 236
 237   /* The arrays with the collation sequence order.  */
 238   unsigned char mbseqorder[256];
 239   struct collseq_table wcseqorder;
 240 };
 241
 242
 243 /* We have a few global variables which are used for reading all
 244    LC_COLLATE category descriptions in all files.  */
 245 static uint32_t nrules;
 246
 247
 248 /* We need UTF-8 encoding of numbers.  */
 249 static inline int
 250 utf8_encode (char *buf, int val)
 251 {
 252   int retval;
 253
 254   if (val < 0x80)
 255     {
 256       *buf++ = (char) val;
 257       retval = 1;
 258     }
 259   else
 260     {
 261       int step;
 262
 263       for (step = 2; step < 6; ++step)
 264         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 265           break;
 266       retval = step;
 267
 268       *buf = (unsigned char) (~0xff >> step);
 269       --step;
 270       do
 271         {
 272           buf[step] = 0x80 | (val & 0x3f);
 273           val >>= 6;
 274         }
 275       while (--step > 0);
 276       *buf |= val;
 277     }
 278
 279   return retval;
 280 }
 281
 282
 283 static struct section_list *
 284 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 285                    struct section_list *next)
 286 {
 287   struct section_list *newp;
 288
 289   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 290                                                 sizeof (*newp));
 291   newp->next = next;
 292   newp->name = string;
 293   newp->first = NULL;
 294
 295   return newp;
 296 }
 297
 298
 299 static struct element_t *
 300 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 301              const uint32_t *wcs, const char *name, size_t namelen,
 302              int is_character)
 303 {
 304   struct element_t *newp;
 305
 306   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 307                                              sizeof (*newp));
 308   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 309                                                     name, namelen);
 310   if (mbs != NULL)
 311     {
 312       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 313       newp->nmbs = mbslen;
 314     }
 315   else
 316     {
 317       newp->mbs = NULL;
 318       newp->nmbs = 0;
 319     }
 320   if (wcs != NULL)
 321     {
 322       size_t nwcs = wcslen ((wchar_t *) wcs);
 323       uint32_t zero = 0;
 324       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 325       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 326       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 327       newp->nwcs = nwcs;
 328     }
 329   else
 330     {
 331       newp->wcs = NULL;
 332       newp->nwcs = 0;
 333     }
 334   newp->mborder = NULL;
 335   newp->wcorder = 0;
 336   newp->used_in_level = 0;
 337   newp->is_character = is_character;
 338
 339   /* Will be allocated later.  */
 340   newp->weights = NULL;
 341
 342   newp->file = NULL;
 343   newp->line = 0;
 344
 345   newp->section = collate->current_section;
 346
 347   newp->last = NULL;
 348   newp->next = NULL;
 349
 350   newp->mbnext = NULL;
 351   newp->mblast = NULL;
 352
 353   return newp;
 354 }
 355
 356
 357 static struct symbol_t *
 358 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 359 {
 360   struct symbol_t *newp;
 361
 362   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 363
 364   newp->name = obstack_copy0 (&collate->mempool, name, len);
 365   newp->order = NULL;
 366
 367   newp->file = NULL;
 368   newp->line = 0;
 369
 370   return newp;
 371 }
 372
 373
 374 /* Test whether this name is already defined somewhere.  */
 375 static int
 376 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 377                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 378                  const char *symbol, size_t symbol_len)
 379 {
 380   void *ignore = NULL;
 381
 382   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 383     {
 384       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 385                 (int) symbol_len, symbol);
 386       return 1;
 387     }
 388
 389   if (repertoire != NULL
 390       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 391           == 0))
 392     {
 393       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 394                 (int) symbol_len, symbol);
 395       return 1;
 396     }
 397
 398   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 399     {
 400       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 401                 (int) symbol_len, symbol);
 402       return 1;
 403     }
 404
 405   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 406     {
 407       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 408                 (int) symbol_len, symbol);
 409       return 1;
 410     }
 411
 412   return 0;
 413 }
 414
 415
 416 /* Read the direction specification.  */
 417 static void
 418 read_directions (struct linereader *ldfile, struct token *arg,
 419                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 420                  struct locale_collate_t *collate)
 421 {
 422   int cnt = 0;
 423   int max = nrules ?: 10;
 424   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 425   int warned = 0;
 426
 427   while (1)
 428     {
 429       int valid = 0;
 430
 431       if (arg->tok == tok_forward)
 432         {
 433           if (rules[cnt] & sort_backward)
 434             {
 435               if (! warned)
 436                 {
 437                   lr_error (ldfile, _("\
 438 %s: `forward' and `backward' are mutually excluding each other"),
 439                             "LC_COLLATE");
 440                   warned = 1;
 441                 }
 442             }
 443           else if (rules[cnt] & sort_forward)
 444             {
 445               if (! warned)
 446                 {
 447                   lr_error (ldfile, _("\
 448 %s: `%s' mentioned more than once in definition of weight %d"),
 449                             "LC_COLLATE", "forward", cnt + 1);
 450                 }
 451             }
 452           else
 453             rules[cnt] |= sort_forward;
 454
 455           valid = 1;
 456         }
 457       else if (arg->tok == tok_backward)
 458         {
 459           if (rules[cnt] & sort_forward)
 460             {
 461               if (! warned)
 462                 {
 463                   lr_error (ldfile, _("\
 464 %s: `forward' and `backward' are mutually excluding each other"),
 465                             "LC_COLLATE");
 466                   warned = 1;
 467                 }
 468             }
 469           else if (rules[cnt] & sort_backward)
 470             {
 471               if (! warned)
 472                 {
 473                   lr_error (ldfile, _("\
 474 %s: `%s' mentioned more than once in definition of weight %d"),
 475                             "LC_COLLATE", "backward", cnt + 1);
 476                 }
 477             }
 478           else
 479             rules[cnt] |= sort_backward;
 480
 481           valid = 1;
 482         }
 483       else if (arg->tok == tok_position)
 484         {
 485           if (rules[cnt] & sort_position)
 486             {
 487               if (! warned)
 488                 {
 489                   lr_error (ldfile, _("\
 490 %s: `%s' mentioned more than once in definition of weight %d"),
 491                             "LC_COLLATE", "position", cnt + 1);
 492                 }
 493             }
 494           else
 495             rules[cnt] |= sort_position;
 496
 497           valid = 1;
 498         }
 499
 500       if (valid)
 501         arg = lr_token (ldfile, charmap, repertoire);
 502
 503       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 504           || arg->tok == tok_semicolon)
 505         {
 506           if (! valid && ! warned)
 507             {
 508               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 509               warned = 1;
 510             }
 511
 512           /* See whether we have to increment the counter.  */
 513           if (arg->tok != tok_comma && rules[cnt] != 0)
 514             {
 515               /* Add the default `forward' if we have seen only `position'.  */
 516               if (rules[cnt] == sort_position)
 517                 rules[cnt] = sort_position | sort_forward;
 518
 519               ++cnt;
 520             }
 521
 522           if (arg->tok == tok_eof || arg->tok == tok_eol)
 523             /* End of line or file, so we exit the loop.  */
 524             break;
 525
 526           if (nrules == 0)
 527             {
 528               /* See whether we have enough room in the array.  */
 529               if (cnt == max)
 530                 {
 531                   max += 10;
 532                   rules = (enum coll_sort_rule *) xrealloc (rules,
 533                                                             max
 534                                                             * sizeof (*rules));
 535                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 536                 }
 537             }
 538           else
 539             {
 540               if (cnt == nrules)
 541                 {
 542                   /* There must not be any more rule.  */
 543                   if (! warned)
 544                     {
 545                       lr_error (ldfile, _("\
 546 %s: too many rules; first entry only had %d"),
 547                                 "LC_COLLATE", nrules);
 548                       warned = 1;
 549                     }
 550
 551                   lr_ignore_rest (ldfile, 0);
 552                   break;
 553                 }
 554             }
 555         }
 556       else
 557         {
 558           if (! warned)
 559             {
 560               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 561               warned = 1;
 562             }
 563         }
 564
 565       arg = lr_token (ldfile, charmap, repertoire);
 566     }
 567
 568   if (nrules == 0)
 569     {
 570       /* Now we know how many rules we have.  */
 571       nrules = cnt;
 572       rules = (enum coll_sort_rule *) xrealloc (rules,
 573                                                 nrules * sizeof (*rules));
 574     }
 575   else
 576     {
 577       if (cnt < nrules)
 578         {
 579           /* Not enough rules in this specification.  */
 580           if (! warned)
 581             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 582
 583           do
 584             rules[cnt] = sort_forward;
 585           while (++cnt < nrules);
 586         }
 587     }
 588
 589   collate->current_section->rules = rules;
 590 }
 591
 592
 593 static struct element_t *
 594 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 595               const char *str, size_t len)
 596 {
 597   struct element_t *result = NULL;
 598
 599   /* Search for the entries among the collation sequences already define.  */
 600   if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
 601     {
 602       /* Nope, not define yet.  So we see whether it is a
 603          collation symbol.  */
 604       void *ptr;
 605
 606       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 607         {
 608           /* It's a collation symbol.  */
 609           struct symbol_t *sym = (struct symbol_t *) ptr;
 610           result = sym->order;
 611
 612           if (result == NULL)
 613             result = sym->order = new_element (collate, NULL, 0, NULL,
 614                                                NULL, 0, 0);
 615         }
 616       else if (find_entry (&collate->elem_table, str, len,
 617                            (void **) &result) != 0)
 618         {
 619           /* It's also no collation element.  So it is a character
 620              element defined later.  */
 621           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 622           if (result != NULL)
 623             /* Insert it into the sequence table.  */
 624             insert_entry (&collate->seq_table, str, len, result);
 625         }
 626     }
 627
 628   return result;
 629 }
 630
 631
 632 static void
 633 unlink_element (struct locale_collate_t *collate)
 634 {
 635   if (collate->cursor == collate->start)
 636     {
 637       assert (collate->cursor->next == NULL);
 638       assert (collate->cursor->last == NULL);
 639       collate->cursor = NULL;
 640     }
 641   else
 642     {
 643       if (collate->cursor->next != NULL)
 644         collate->cursor->next->last = collate->cursor->last;
 645       if (collate->cursor->last != NULL)
 646         collate->cursor->last->next = collate->cursor->next;
 647       collate->cursor = collate->cursor->last;
 648     }
 649 }
 650
 651
 652 static void
 653 insert_weights (struct linereader *ldfile, struct element_t *elem,
 654                 struct charmap_t *charmap, struct repertoire_t *repertoire,
 655                 struct locale_collate_t *collate, enum token_t ellipsis)
 656 {
 657   int weight_cnt;
 658   struct token *arg;
 659
 660   /* Initialize all the fields.  */
 661   elem->file = ldfile->fname;
 662   elem->line = ldfile->lineno;
 663   elem->last = collate->cursor;
 664   elem->next = collate->cursor ? collate->cursor->next : NULL;
 665   if (collate->cursor != NULL && collate->cursor->next != NULL)
 666     collate->cursor->next->last = elem;
 667   elem->section = collate->current_section;
 668   if (collate->cursor != NULL)
 669     collate->cursor->next = elem;
 670   if (collate->start == NULL)
 671     {
 672       assert (collate->cursor == NULL);
 673       collate->start = elem;
 674     }
 675   elem->weights = (struct element_list_t *)
 676     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 677   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 678
 679   if (collate->current_section->first == NULL)
 680     collate->current_section->first = elem;
 681   if (collate->current_section->last == collate->cursor)
 682     collate->current_section->last = elem;
 683
 684   collate->cursor = elem;
 685
 686   weight_cnt = 0;
 687
 688   arg = lr_token (ldfile, charmap, repertoire);
 689   do
 690     {
 691       if (arg->tok == tok_eof || arg->tok == tok_eol)
 692         break;
 693
 694       if (arg->tok == tok_ignore)
 695         {
 696           /* The weight for this level has to be ignored.  We use the
 697              null pointer to indicate this.  */
 698           elem->weights[weight_cnt].w = (struct element_t **)
 699             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 700           elem->weights[weight_cnt].w[0] = NULL;
 701           elem->weights[weight_cnt].cnt = 1;
 702         }
 703       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 704         {
 705           char ucs4str[10];
 706           struct element_t *val;
 707           char *symstr;
 708           size_t symlen;
 709
 710           if (arg->tok == tok_bsymbol)
 711             {
 712               symstr = arg->val.str.startmb;
 713               symlen = arg->val.str.lenmb;
 714             }
 715           else
 716             {
 717               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 718               symstr = ucs4str;
 719               symlen = 9;
 720             }
 721
 722           val = find_element (ldfile, collate, symstr, symlen);
 723           if (val == NULL)
 724             break;
 725
 726           elem->weights[weight_cnt].w = (struct element_t **)
 727             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 728           elem->weights[weight_cnt].w[0] = val;
 729           elem->weights[weight_cnt].cnt = 1;
 730         }
 731       else if (arg->tok == tok_string)
 732         {
 733           /* Split the string up in the individual characters and put
 734              the element definitions in the list.  */
 735           const char *cp = arg->val.str.startmb;
 736           int cnt = 0;
 737           struct element_t *charelem;
 738           struct element_t **weights = NULL;
 739           int max = 0;
 740
 741           if (*cp == '\0')
 742             {
 743               lr_error (ldfile, _("%s: empty weight string not allowed"),
 744                         "LC_COLLATE");
 745               lr_ignore_rest (ldfile, 0);
 746               break;
 747             }
 748
 749           do
 750             {
 751               if (*cp == '<')
 752                 {
 753                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 754                      the latter we have to unify the name.  */
 755                   const char *startp = ++cp;
 756                   size_t len;
 757
 758                   while (*cp != '>')
 759                     {
 760                       if (*cp == ldfile->escape_char)
 761                         ++cp;
 762                       if (*cp == '\0')
 763                         /* It's a syntax error.  */
 764                         goto syntax;
 765
 766                       ++cp;
 767                     }
 768
 769                   if (cp - startp == 5 && startp[0] == 'U'
 770                       && isxdigit (startp[1]) && isxdigit (startp[2])
 771                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 772                     {
 773                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 774                       char *newstr;
 775
 776                       newstr = (char *) xmalloc (10);
 777                       snprintf (newstr, 10, "U%08X", ucs4);
 778                       startp = newstr;
 779
 780                       len = 9;
 781                     }
 782                   else
 783                     len = cp - startp;
 784
 785                   charelem = find_element (ldfile, collate, startp, len);
 786                   ++cp;
 787                 }
 788               else
 789                 {
 790                   /* People really shouldn't use characters directly in
 791                      the string.  Especially since it's not really clear
 792                      what this means.  We interpret all characters in the
 793                      string as if that would be bsymbols.  Otherwise we
 794                      would have to match back to bsymbols somehow and this
 795                      is normally not what people normally expect.  */
 796                   charelem = find_element (ldfile, collate, cp++, 1);
 797                 }
 798
 799               if (charelem == NULL)
 800                 {
 801                   /* We ignore the rest of the line.  */
 802                   lr_ignore_rest (ldfile, 0);
 803                   break;
 804                 }
 805
 806               /* Add the pointer.  */
 807               if (cnt >= max)
 808                 {
 809                   struct element_t **newp;
 810                   max += 10;
 811                   newp = (struct element_t **)
 812                     alloca (max * sizeof (struct element_t *));
 813                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 814                   weights = newp;
 815                 }
 816               weights[cnt++] = charelem;
 817             }
 818           while (*cp != '\0');
 819
 820           /* Now store the information.  */
 821           elem->weights[weight_cnt].w = (struct element_t **)
 822             obstack_alloc (&collate->mempool,
 823                            cnt * sizeof (struct element_t *));
 824           memcpy (elem->weights[weight_cnt].w, weights,
 825                   cnt * sizeof (struct element_t *));
 826           elem->weights[weight_cnt].cnt = cnt;
 827
 828           /* We don't need the string anymore.  */
 829           free (arg->val.str.startmb);
 830         }
 831       else if (ellipsis != tok_none
 832                && (arg->tok == tok_ellipsis2
 833                    || arg->tok == tok_ellipsis3
 834                    || arg->tok == tok_ellipsis4))
 835         {
 836           /* It must be the same ellipsis as used in the initial column.  */
 837           if (arg->tok != ellipsis)
 838             lr_error (ldfile, _("\
 839 %s: weights must use the same ellipsis symbol as the name"),
 840                       "LC_COLLATE");
 841
 842           /* The weight for this level has to be ignored.  We use the
 843              null pointer to indicate this.  */
 844           elem->weights[weight_cnt].w = (struct element_t **)
 845             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 846           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 847           elem->weights[weight_cnt].cnt = 1;
 848         }
 849       else
 850         {
 851         syntax:
 852           /* It's a syntax error.  */
 853           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 854           lr_ignore_rest (ldfile, 0);
 855           break;
 856         }
 857
 858       arg = lr_token (ldfile, charmap, repertoire);
 859       /* This better should be the end of the line or a semicolon.  */
 860       if (arg->tok == tok_semicolon)
 861         /* OK, ignore this and read the next token.  */
 862         arg = lr_token (ldfile, charmap, repertoire);
 863       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 864         {
 865           /* It's a syntax error.  */
 866           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 867           lr_ignore_rest (ldfile, 0);
 868           break;
 869         }
 870     }
 871   while (++weight_cnt < nrules);
 872
 873   if (weight_cnt < nrules)
 874     {
 875       /* This means the rest of the line uses the current element as
 876          the weight.  */
 877       do
 878         {
 879           elem->weights[weight_cnt].w = (struct element_t **)
 880             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 881           if (ellipsis == tok_none)
 882             elem->weights[weight_cnt].w[0] = elem;
 883           else
 884             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 885           elem->weights[weight_cnt].cnt = 1;
 886         }
 887       while (++weight_cnt < nrules);
 888     }
 889   else
 890     {
 891       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 892         {
 893           /* Too many rule values.  */
 894           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 895           lr_ignore_rest (ldfile, 0);
 896         }
 897       else
 898         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 899     }
 900 }
 901
 902
 903 static int
 904 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 905               struct charmap_t *charmap, struct repertoire_t *repertoire,
 906               struct locale_collate_t *collate)
 907 {
 908   /* First find out what kind of symbol this is.  */
 909   struct charseq *seq;
 910   uint32_t wc;
 911   struct element_t *elem = NULL;
 912
 913   /* Try to find the character in the charmap.  */
 914   seq = charmap_find_value (charmap, symstr, symlen);
 915
 916   /* Determine the wide character.  */
 917   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 918     {
 919       wc = repertoire_find_value (repertoire, symstr, symlen);
 920       if (seq != NULL)
 921         seq->ucs4 = wc;
 922     }
 923   else
 924     wc = seq->ucs4;
 925
 926   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 927     {
 928       /* It's no character, so look through the collation elements and
 929          symbol list.  */
 930       void *result;
 931
 932       if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 933         {
 934           /* It's a collation symbol.  */
 935           struct symbol_t *sym = (struct symbol_t *) result;
 936           elem = sym->order;
 937
 938           if (elem == NULL)
 939             elem = sym->order = new_element (collate, NULL, 0, NULL,
 940                                              sym->name, strlen (sym->name), 0);
 941         }
 942       else if (find_entry (&collate->elem_table, symstr, symlen,
 943                            (void **) &elem) != 0)
 944         {
 945           /* It's also no collation element.  Therefore ignore it.  */
 946           lr_ignore_rest (ldfile, 0);
 947           return 1;
 948         }
 949     }
 950   else
 951     {
 952       /* Otherwise the symbols stands for a character.  */
 953       if (find_entry (&collate->seq_table, symstr, symlen,
 954                       (void **) &elem) != 0)
 955         {
 956           uint32_t wcs[2] = { wc, 0 };
 957
 958           /* We have to allocate an entry.  */
 959           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 960                               seq != NULL ? seq->nbytes : 0,
 961                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 962                               symstr, symlen, 1);
 963
 964           /* And add it to the table.  */
 965           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
 966             /* This cannot happen.  */
 967             assert (! "Internal error");
 968         }
 969       else
 970         {
 971           /* Maybe the character was used before the definition.  In this case
 972              we have to insert the byte sequences now.  */
 973           if (elem->mbs == NULL && seq != NULL)
 974             {
 975               elem->mbs = obstack_copy0 (&collate->mempool,
 976                                          seq->bytes, seq->nbytes);
 977               elem->nmbs = seq->nbytes;
 978             }
 979
 980           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
 981             {
 982               uint32_t wcs[2] = { wc, 0 };
 983
 984               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
 985               elem->nwcs = 1;
 986             }
 987         }
 988     }
 989
 990   /* Test whether this element is not already in the list.  */
 991   if (elem->next != NULL || (collate->cursor != NULL
 992                              && elem->next == collate->cursor))
 993     {
 994       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
 995                 (int) symlen, symstr, elem->file, elem->line);
 996       lr_ignore_rest (ldfile, 0);
 997       return 1;
 998     }
 999
1000   insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
1001
1002   return 0;
1003 }
1004
1005
1006 static void
1007 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1008                  enum token_t ellipsis, struct charmap_t *charmap,
1009                  struct repertoire_t *repertoire,
1010                  struct locale_collate_t *collate)
1011 {
1012   struct element_t *startp;
1013   struct element_t *endp;
1014
1015   /* Unlink the entry added for the ellipsis.  */
1016   unlink_element (collate);
1017   startp = collate->cursor;
1018
1019   /* Process and add the end-entry.  */
1020   if (symstr != NULL
1021       && insert_value (ldfile, symstr, symlen, charmap, repertoire, collate))
1022     /* Something went wrong with inserting the to-value.  This means
1023        we cannot process the ellipsis.  */
1024     return;
1025
1026   /* Reset the cursor.  */
1027   collate->cursor = startp;
1028
1029   /* Now we have to handle many different situations:
1030      - we have to distinguish between the three different ellipsis forms
1031      - the is the ellipsis at the beginning, in the middle, or at the end.
1032   */
1033   endp = collate->cursor->next;
1034   assert (symstr == NULL || endp != NULL);
1035
1036   /* XXX The following is probably very wrong since also collating symbols
1037      can appear in ranges.  But do we want/can refine the test for that?  */
1038 #if 0
1039   /* Both, the start and the end symbol, must stand for characters.  */
1040   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1041       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1042     {
1043       lr_error (ldfile, _("\
1044 %s: the start and the end symbol of a range must stand for characters"),
1045                 "LC_COLLATE");
1046       return;
1047     }
1048 #endif
1049
1050   if (ellipsis == tok_ellipsis3)
1051     {
1052       /* One requirement we make here: the length of the byte
1053          sequences for the first and end character must be the same.
1054          This is mainly to prevent unwanted effects and this is often
1055          not what is wanted.  */
1056       size_t len = (startp->mbs != NULL ? startp->nmbs
1057                     : (endp->mbs != NULL ? endp->nmbs : 0));
1058       char mbcnt[len + 1];
1059       char mbend[len + 1];
1060
1061       /* Well, this should be caught somewhere else already.  Just to
1062          make sure.  */
1063       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1064       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1065
1066       if (startp != NULL && endp != NULL
1067           && startp->mbs != NULL && endp->mbs != NULL
1068           && startp->nmbs != endp->nmbs)
1069         {
1070           lr_error (ldfile, _("\
1071 %s: byte sequences of first and last character must have the same length"),
1072                     "LC_COLLATE");
1073           return;
1074         }
1075
1076       /* Determine whether we have to generate multibyte sequences.  */
1077       if ((startp == NULL || startp->mbs != NULL)
1078           && (endp == NULL || endp->mbs != NULL))
1079         {
1080           int cnt;
1081           int ret;
1082
1083           /* Prepare the beginning byte sequence.  This is either from the
1084              beginning byte sequence or it is all nulls if it was an
1085              initial ellipsis.  */
1086           if (startp == NULL || startp->mbs == NULL)
1087             memset (mbcnt, '\0', len);
1088           else
1089             {
1090               memcpy (mbcnt, startp->mbs, len);
1091
1092               /* And increment it so that the value is the first one we will
1093                  try to insert.  */
1094               for (cnt = len - 1; cnt >= 0; --cnt)
1095                 if (++mbcnt[cnt] != '\0')
1096                   break;
1097             }
1098           mbcnt[len] = '\0';
1099
1100           /* And the end sequence.  */
1101           if (endp == NULL || endp->mbs == NULL)
1102             memset (mbend, '\0', len);
1103           else
1104             memcpy (mbend, endp->mbs, len);
1105           mbend[len] = '\0';
1106
1107           /* Test whether we have a correct range.  */
1108           ret = memcmp (mbcnt, mbend, len);
1109           if (ret >= 0)
1110             {
1111               if (ret > 0)
1112                 lr_error (ldfile, _("%s: byte sequence of first character of \
1113 sequence is not lower than that of the last character"), "LC_COLLATE");
1114               return;
1115             }
1116
1117           /* Generate the byte sequences data.  */
1118           while (1)
1119             {
1120               struct charseq *seq;
1121
1122               /* Quite a bit of work ahead.  We have to find the character
1123                  definition for the byte sequence and then determine the
1124                  wide character belonging to it.  */
1125               seq = charmap_find_symbol (charmap, mbcnt, len);
1126               if (seq != NULL)
1127                 {
1128                   struct element_t *elem;
1129                   size_t namelen;
1130
1131                   /* I don't this this can ever happen.  */
1132                   assert (seq->name != NULL);
1133                   namelen = strlen (seq->name);
1134
1135                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1136                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1137                                                        namelen);
1138
1139                   /* Now we are ready to insert the new value in the
1140                      sequence.  Find out whether the element is
1141                      already known.  */
1142                   if (find_entry (&collate->seq_table, seq->name, namelen,
1143                                   (void **) &elem) != 0)
1144                     {
1145                       uint32_t wcs[2] = { seq->ucs4, 0 };
1146
1147                       /* We have to allocate an entry.  */
1148                       elem = new_element (collate, mbcnt, len,
1149                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1150                                           ? NULL : wcs, seq->name,
1151                                           namelen, 1);
1152
1153                       /* And add it to the table.  */
1154                       if (insert_entry (&collate->seq_table, seq->name,
1155                                         namelen, elem) != 0)
1156                         /* This cannot happen.  */
1157                         assert (! "Internal error");
1158                     }
1159
1160                   /* Test whether this element is not already in the list.  */
1161                   if (elem->next != NULL || (collate->cursor != NULL
1162                                              && elem->next == collate->cursor))
1163                     {
1164                       lr_error (ldfile, _("\
1165 order for `%.*s' already defined at %s:%Zu"),
1166                                 (int) namelen, seq->name,
1167                                 elem->file, elem->line);
1168                       goto increment;
1169                     }
1170
1171                   /* Enqueue the new element.  */
1172                   elem->last = collate->cursor;
1173                   if (collate->cursor == NULL)
1174                     elem->next = NULL;
1175                   else
1176                     {
1177                       elem->next = collate->cursor->next;
1178                       elem->last->next = elem;
1179                       if (elem->next != NULL)
1180                         elem->next->last = elem;
1181                     }
1182                   if (collate->start == NULL)
1183                     {
1184                       assert (collate->cursor == NULL);
1185                       collate->start = elem;
1186                     }
1187                   collate->cursor = elem;
1188
1189                  /* Add the weight value.  We take them from the
1190                     `ellipsis_weights' member of `collate'.  */
1191                   elem->weights = (struct element_list_t *)
1192                     obstack_alloc (&collate->mempool,
1193                                    nrules * sizeof (struct element_list_t));
1194                   for (cnt = 0; cnt < nrules; ++cnt)
1195                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1196                         && (collate->ellipsis_weight.weights[cnt].w[0]
1197                             == ELEMENT_ELLIPSIS2))
1198                       {
1199                         elem->weights[cnt].w = (struct element_t **)
1200                           obstack_alloc (&collate->mempool,
1201                                          sizeof (struct element_t *));
1202                         elem->weights[cnt].w[0] = elem;
1203                         elem->weights[cnt].cnt = 1;
1204                       }
1205                     else
1206                       {
1207                         /* Simply use the weight from `ellipsis_weight'.  */
1208                         elem->weights[cnt].w =
1209                           collate->ellipsis_weight.weights[cnt].w;
1210                         elem->weights[cnt].cnt =
1211                           collate->ellipsis_weight.weights[cnt].cnt;
1212                       }
1213                 }
1214
1215               /* Increment for the next round.  */
1216             increment:
1217               for (cnt = len - 1; cnt >= 0; --cnt)
1218                 if (++mbcnt[cnt] != '\0')
1219                   break;
1220
1221               /* Find out whether this was all.  */
1222               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1223                 /* Yep, that's all.  */
1224                 break;
1225             }
1226         }
1227     }
1228   else
1229     {
1230       /* For symbolic range we naturally must have a beginning and an
1231          end specified by the user.  */
1232       if (startp == NULL)
1233         lr_error (ldfile, _("\
1234 %s: symbolic range ellipsis must not directly follow `order_start'"),
1235                   "LC_COLLATE");
1236       else if (endp == NULL)
1237         lr_error (ldfile, _("\
1238 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1239                   "LC_COLLATE");
1240       else
1241         {
1242           /* Determine the range.  To do so we have to determine the
1243              common prefix of the both names and then the numeric
1244              values of both ends.  */
1245           size_t lenfrom = strlen (startp->name);
1246           size_t lento = strlen (endp->name);
1247           char buf[lento + 1];
1248           int preflen = 0;
1249           long int from;
1250           long int to;
1251           char *cp;
1252           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1253
1254           if (lenfrom != lento)
1255             {
1256             invalid_range:
1257               lr_error (ldfile, _("\
1258 `%s' and `%.*s' are no valid names for symbolic range"),
1259                         startp->name, (int) lento, endp->name);
1260               return;
1261             }
1262
1263           while (startp->name[preflen] == endp->name[preflen])
1264             if (startp->name[preflen] == '\0')
1265               /* Nothing to be done.  The start and end point are identical
1266                  and while inserting the end point we have already given
1267                  the user an error message.  */
1268               return;
1269             else
1270               ++preflen;
1271
1272           errno = 0;
1273           from = strtol (startp->name + preflen, &cp, base);
1274           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1275             goto invalid_range;
1276
1277           errno = 0;
1278           to = strtol (endp->name + preflen, &cp, base);
1279           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1280             goto invalid_range;
1281
1282           /* Copy the prefix.  */
1283           memcpy (buf, startp->name, preflen);
1284
1285           /* Loop over all values.  */
1286           for (++from; from < to; ++from)
1287             {
1288               struct element_t *elem = NULL;
1289               struct charseq *seq;
1290               uint32_t wc;
1291               int cnt;
1292
1293               /* Generate the the name.  */
1294               sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1295
1296               /* Look whether this name is already defined.  */
1297               if (find_entry (&collate->seq_table, buf, symlen,
1298                               (void **) &elem) == 0)
1299                 {
1300                   if (elem->next != NULL || (collate->cursor != NULL
1301                                              && elem->next == collate->cursor))
1302                     {
1303                       lr_error (ldfile, _("\
1304 %s: order for `%.*s' already defined at %s:%Zu"),
1305                                 "LC_COLLATE", (int) lenfrom, buf,
1306                                 elem->file, elem->line);
1307                       continue;
1308                     }
1309
1310                   if (elem->name == NULL)
1311                     {
1312                       lr_error (ldfile, _("%s: `%s' must be a charater"),
1313                                 "LC_COLLATE", buf);
1314                       continue;
1315                     }
1316                 }
1317
1318               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1319                 {
1320                   /* Search for a character of this name.  */
1321                   seq = charmap_find_value (charmap, buf, lenfrom);
1322                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1323                     {
1324                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1325
1326                       if (seq != NULL)
1327                         seq->ucs4 = wc;
1328                     }
1329                   else
1330                     wc = seq->ucs4;
1331
1332                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1333                     /* We don't know anything about a character with this
1334                        name.  XXX Should we warn?  */
1335                     continue;
1336
1337                   if (elem == NULL)
1338                     {
1339                       uint32_t wcs[2] = { wc, 0 };
1340
1341                       /* We have to allocate an entry.  */
1342                       elem = new_element (collate,
1343                                           seq != NULL ? seq->bytes : NULL,
1344                                           seq != NULL ? seq->nbytes : 0,
1345                                           wc == ILLEGAL_CHAR_VALUE
1346                                           ? NULL : wcs, buf, lenfrom, 1);
1347                     }
1348                   else
1349                     {
1350                       /* Update the element.  */
1351                       if (seq != NULL)
1352                         {
1353                           elem->mbs = obstack_copy0 (&collate->mempool,
1354                                                      seq->bytes, seq->nbytes);
1355                           elem->nmbs = seq->nbytes;
1356                         }
1357
1358                       if (wc != ILLEGAL_CHAR_VALUE)
1359                         {
1360                           uint32_t zero = 0;
1361
1362                           obstack_grow (&collate->mempool,
1363                                         &wc, sizeof (uint32_t));
1364                           obstack_grow (&collate->mempool,
1365                                         &zero, sizeof (uint32_t));
1366                           elem->wcs = obstack_finish (&collate->mempool);
1367                           elem->nwcs = 1;
1368                         }
1369                     }
1370
1371                   elem->file = ldfile->fname;
1372                   elem->line = ldfile->lineno;
1373                   elem->section = collate->current_section;
1374                 }
1375
1376               /* Enqueue the new element.  */
1377               elem->last = collate->cursor;
1378               elem->next = collate->cursor->next;
1379               elem->last->next = elem;
1380               if (elem->next != NULL)
1381                 elem->next->last = elem;
1382               collate->cursor = elem;
1383
1384               /* Now add the weights.  They come from the `ellipsis_weights'
1385                  member of `collate'.  */
1386               elem->weights = (struct element_list_t *)
1387                 obstack_alloc (&collate->mempool,
1388                                nrules * sizeof (struct element_list_t));
1389               for (cnt = 0; cnt < nrules; ++cnt)
1390                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1391                     && (collate->ellipsis_weight.weights[cnt].w[0]
1392                         == ELEMENT_ELLIPSIS2))
1393                   {
1394                     elem->weights[cnt].w = (struct element_t **)
1395                       obstack_alloc (&collate->mempool,
1396                                      sizeof (struct element_t *));
1397                     elem->weights[cnt].w[0] = elem;
1398                     elem->weights[cnt].cnt = 1;
1399                   }
1400                 else
1401                   {
1402                     /* Simly use the weight from `ellipsis_weight'.  */
1403                     elem->weights[cnt].w =
1404                       collate->ellipsis_weight.weights[cnt].w;
1405                     elem->weights[cnt].cnt =
1406                       collate->ellipsis_weight.weights[cnt].cnt;
1407                   }
1408             }
1409         }
1410     }
1411 }
1412
1413
1414 static void
1415 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1416                  struct localedef_t *copy_locale, int ignore_content)
1417 {
1418   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1419     {
1420       struct locale_collate_t *collate;
1421
1422       if (copy_locale == NULL)
1423         {
1424           collate = locale->categories[LC_COLLATE].collate =
1425             (struct locale_collate_t *)
1426             xcalloc (1, sizeof (struct locale_collate_t));
1427
1428           /* Init the various data structures.  */
1429           init_hash (&collate->elem_table, 100);
1430           init_hash (&collate->sym_table, 100);
1431           init_hash (&collate->seq_table, 500);
1432           obstack_init (&collate->mempool);
1433
1434           collate->col_weight_max = -1;
1435         }
1436       else
1437         collate = locale->categories[LC_COLLATE].collate =
1438           copy_locale->categories[LC_COLLATE].collate;
1439     }
1440
1441   ldfile->translate_strings = 0;
1442   ldfile->return_widestr = 0;
1443 }
1444
1445
1446 void
1447 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1448 {
1449   /* Now is the time when we can assign the individual collation
1450      values for all the symbols.  We have possibly different values
1451      for the wide- and the multibyte-character symbols.  This is done
1452      since it might make a difference in the encoding if there is in
1453      some cases no multibyte-character but there are wide-characters.
1454      (The other way around it is not important since theencoded
1455      collation value in the wide-character case is 32 bits wide and
1456      therefore requires no encoding).
1457
1458      The lowest collation value assigned is 2.  Zero is reserved for
1459      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1460      functions and 1 is used to separate the individual passes for the
1461      different rules.
1462
1463      We also have to construct is list with all the bytes/words which
1464      can come first in a sequence, followed by all the elements which
1465      also start with this byte/word.  The order is reverse which has
1466      among others the important effect that longer strings are located
1467      first in the list.  This is required for the output data since
1468      the algorithm used in `strcoll' etc depends on this.
1469
1470      The multibyte case is easy.  We simply sort into an array with
1471      256 elements.  */
1472   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1473   int mbact[nrules];
1474   int wcact;
1475   int mbseqact;
1476   int wcseqact;
1477   struct element_t *runp;
1478   int i;
1479   int need_undefined = 0;
1480   struct section_list *sect;
1481   int ruleidx;
1482   int nr_wide_elems = 0;
1483
1484   if (collate == NULL)
1485     {
1486       /* No data, no check.  */
1487       if (! be_quiet)
1488         error (0, 0, _("No definition for %s category found"), "LC_COLLATE");
1489       return;
1490     }
1491
1492   /* If this assertion is hit change the type in `element_t'.  */
1493   assert (nrules <= sizeof (runp->used_in_level) * 8);
1494
1495   /* Make sure that the `position' rule is used either in all sections
1496      or in none.  */
1497   for (i = 0; i < nrules; ++i)
1498     for (sect = collate->sections; sect != NULL; sect = sect->next)
1499       if (sect->rules != NULL
1500           && ((sect->rules[i] & sort_position)
1501               != (collate->sections->rules[i] & sort_position)))
1502         {
1503           error (0, 0, _("\
1504 %s: `position' must be used for a specific level in all sections or none"),
1505                  "LC_COLLATE");
1506           break;
1507         }
1508
1509   /* Find out which elements are used at which level.  At the same
1510      time we find out whether we have any undefined symbols.  */
1511   runp = collate->start;
1512   while (runp != NULL)
1513     {
1514       if (runp->mbs != NULL)
1515         {
1516           for (i = 0; i < nrules; ++i)
1517             {
1518               int j;
1519
1520               for (j = 0; j < runp->weights[i].cnt; ++j)
1521                 /* A NULL pointer as the weight means IGNORE.  */
1522                 if (runp->weights[i].w[j] != NULL)
1523                   {
1524                     if (runp->weights[i].w[j]->weights == NULL)
1525                       {
1526                         error_at_line (0, 0, runp->file, runp->line,
1527                                        _("symbol `%s' not defined"),
1528                                        runp->weights[i].w[j]->name);
1529
1530                         need_undefined = 1;
1531                         runp->weights[i].w[j] = &collate->undefined;
1532                       }
1533                     else
1534                       /* Set the bit for the level.  */
1535                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1536                   }
1537             }
1538         }
1539
1540       /* Up to the next entry.  */
1541       runp = runp->next;
1542     }
1543
1544   /* Walk through the list of defined sequences and assign weights.  Also
1545      create the data structure which will allow generating the single byte
1546      character based tables.
1547
1548      Since at each time only the weights for each of the rules are
1549      only compared to other weights for this rule it is possible to
1550      assign more compact weight values than simply counting all
1551      weights in sequence.  We can assign weights from 3, one for each
1552      rule individually and only for those elements, which are actually
1553      used for this rule.
1554
1555      Why is this important?  It is not for the wide char table.  But
1556      it is for the singlebyte output since here larger numbers have to
1557      be encoded to make it possible to emit the value as a byte
1558      string.  */
1559   for (i = 0; i < nrules; ++i)
1560     mbact[i] = 2;
1561   wcact = 2;
1562   mbseqact = 0;
1563   wcseqact = 0;
1564   runp = collate->start;
1565   while (runp != NULL)
1566     {
1567       /* Determine the order.  */
1568       if (runp->used_in_level != 0)
1569         {
1570           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1571                                                  nrules * sizeof (int));
1572
1573           for (i = 0; i < nrules; ++i)
1574             if ((runp->used_in_level & (1 << i)) != 0)
1575               runp->mborder[i] = mbact[i]++;
1576             else
1577               runp->mborder[i] = 0;
1578         }
1579
1580       if (runp->mbs != NULL)
1581         {
1582           struct element_t **eptr;
1583           struct element_t *lastp = NULL;
1584
1585           /* Find the point where to insert in the list.  */
1586           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1587           while (*eptr != NULL)
1588             {
1589               if ((*eptr)->nmbs < runp->nmbs)
1590                 break;
1591
1592               if ((*eptr)->nmbs == runp->nmbs)
1593                 {
1594                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1595
1596                   if (c == 0)
1597                     {
1598                       /* This should not happen.  It means that we have
1599                          to symbols with the same byte sequence.  It is
1600                          of course an error.  */
1601                       error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1602                                      _("symbol `%s' has the same encoding as"),
1603                                      (*eptr)->name);
1604                       error_at_line (0, 0, runp->file, runp->line,
1605                                      _("symbol `%s'"), runp->name);
1606                       goto dont_insert;
1607                     }
1608                   else if (c < 0)
1609                     /* Insert it here.  */
1610                     break;
1611                 }
1612
1613               /* To the next entry.  */
1614               lastp = *eptr;
1615               eptr = &(*eptr)->mbnext;
1616             }
1617
1618           /* Set the pointers.  */
1619           runp->mbnext = *eptr;
1620           runp->mblast = lastp;
1621           if (*eptr != NULL)
1622             (*eptr)->mblast = runp;
1623           *eptr = runp;
1624         dont_insert:
1625         }
1626
1627       if (runp->used_in_level)
1628         {
1629           runp->wcorder = wcact++;
1630
1631           /* We take the opportunity to count the elements which have
1632              wide characters.  */
1633           ++nr_wide_elems;
1634         }
1635
1636       if (runp->is_character)
1637         {
1638           if (runp->nmbs == 1)
1639             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1640
1641           runp->wcseqorder = wcseqact++;
1642         }
1643
1644       /* Up to the next entry.  */
1645       runp = runp->next;
1646     }
1647
1648   /* Find out whether any of the `mbheads' entries is unset.  In this
1649      case we use the UNDEFINED entry.  */
1650   for (i = 1; i < 256; ++i)
1651     if (collate->mbheads[i] == NULL)
1652       {
1653         need_undefined = 1;
1654         collate->mbheads[i] = &collate->undefined;
1655       }
1656
1657   /* Now to the wide character case.  */
1658   collate->wcheads.p = 6;
1659   collate->wcheads.q = 10;
1660   wchead_table_init (&collate->wcheads);
1661
1662   collate->wcseqorder.p = 6;
1663   collate->wcseqorder.q = 10;
1664   collseq_table_init (&collate->wcseqorder);
1665
1666   /* Start adding.  */
1667   runp = collate->start;
1668   while (runp != NULL)
1669     {
1670       if (runp->wcs != NULL)
1671         {
1672           struct element_t *e;
1673           struct element_t **eptr;
1674           struct element_t *lastp;
1675
1676           /* Insert the collation sequence value.  */
1677           collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1678                              runp->wcseqorder);
1679
1680           /* Find the point where to insert in the list.  */
1681           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1682           eptr = &e;
1683           lastp = NULL;
1684           while (*eptr != NULL)
1685             {
1686               if ((*eptr)->nwcs < runp->nwcs)
1687                 break;
1688
1689               if ((*eptr)->nwcs == runp->nwcs)
1690                 {
1691                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1692                                    (wchar_t *) runp->wcs, runp->nwcs);
1693
1694                   if (c == 0)
1695                     {
1696                       /* This should not happen.  It means that we have
1697                          two symbols with the same byte sequence.  It is
1698                          of course an error.  */
1699                       error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1700                                      _("symbol `%s' has the same encoding as"),
1701                                      (*eptr)->name);
1702                       error_at_line (0, 0, runp->file, runp->line,
1703                                      _("symbol `%s'"), runp->name);
1704                       goto dont_insertwc;
1705                     }
1706                   else if (c < 0)
1707                     /* Insert it here.  */
1708                     break;
1709                 }
1710
1711               /* To the next entry.  */
1712               lastp = *eptr;
1713               eptr = &(*eptr)->wcnext;
1714             }
1715
1716           /* Set the pointers.  */
1717           runp->wcnext = *eptr;
1718           runp->wclast = lastp;
1719           if (*eptr != NULL)
1720             (*eptr)->wclast = runp;
1721           *eptr = runp;
1722           if (eptr == &e)
1723             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1724         dont_insertwc:
1725         }
1726
1727       /* Up to the next entry.  */
1728       runp = runp->next;
1729     }
1730
1731   collseq_table_finalize (&collate->wcseqorder);
1732
1733   /* Now determine whether the UNDEFINED entry is needed and if yes,
1734      whether it was defined.  */
1735   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1736   if (collate->undefined.file == NULL)
1737     {
1738       if (need_undefined)
1739         {
1740           /* This seems not to be enforced by recent standards.  Don't
1741              emit an error, simply append UNDEFINED at the end.  */
1742           if (0)
1743             error (0, 0, _("no definition of `UNDEFINED'"));
1744
1745           /* Add UNDEFINED at the end.  */
1746           collate->undefined.mborder =
1747             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1748
1749           for (i = 0; i < nrules; ++i)
1750             collate->undefined.mborder[i] = mbact[i]++;
1751         }
1752
1753       /* In any case we will need the definition for the wide character
1754          case.  But we will not complain that it is missing since the
1755          specification strangely enough does not seem to account for
1756          this.  */
1757       collate->undefined.wcorder = wcact++;
1758     }
1759
1760   /* Finally, try to unify the rules for the sections.  Whenever the rules
1761      for a section are the same as those for another section give the
1762      ruleset the same index.  Since there are never many section we can
1763      use an O(n^2) algorithm here.  */
1764   sect = collate->sections;
1765   while (sect != NULL && sect->rules == NULL)
1766     sect = sect->next;
1767   assert (sect != NULL);
1768   ruleidx = 0;
1769   do
1770     {
1771       struct section_list *osect = collate->sections;
1772
1773       while (osect != sect)
1774         if (osect->rules != NULL
1775             && memcmp (osect->rules, sect->rules, nrules) == 0)
1776           break;
1777         else
1778           osect = osect->next;
1779
1780       if (osect == sect)
1781         sect->ruleidx = ruleidx++;
1782       else
1783         sect->ruleidx = osect->ruleidx;
1784
1785       /* Next section.  */
1786       do
1787         sect = sect->next;
1788       while (sect != NULL && sect->rules == NULL);
1789     }
1790   while (sect != NULL);
1791   /* We are currently not prepared for more than 256 rulesets.  But this
1792      should never really be a problem.  */
1793   assert (ruleidx <= 256);
1794 }
1795
1796
1797 static int32_t
1798 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1799                struct element_t *elem)
1800 {
1801   size_t cnt;
1802   int32_t retval;
1803
1804   /* Optimize the use of UNDEFINED.  */
1805   if (elem == &collate->undefined)
1806     /* The weights are already inserted.  */
1807     return 0;
1808
1809   /* This byte can start exactly one collation element and this is
1810      a single byte.  We can directly give the index to the weights.  */
1811   retval = obstack_object_size (pool);
1812
1813   /* Construct the weight.  */
1814   for (cnt = 0; cnt < nrules; ++cnt)
1815     {
1816       char buf[elem->weights[cnt].cnt * 7];
1817       int len = 0;
1818       int i;
1819
1820       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1821         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1822         if (elem->weights[cnt].w[i] != NULL)
1823           len += utf8_encode (&buf[len],
1824                               elem->weights[cnt].w[i]->mborder[cnt]);
1825
1826       /* And add the buffer content.  */
1827       obstack_1grow (pool, len);
1828       obstack_grow (pool, buf, len);
1829     }
1830
1831   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1832 }
1833
1834
1835 static int32_t
1836 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1837                  struct element_t *elem)
1838 {
1839   size_t cnt;
1840   int32_t retval;
1841
1842   /* Optimize the use of UNDEFINED.  */
1843   if (elem == &collate->undefined)
1844     /* The weights are already inserted.  */
1845     return 0;
1846
1847   /* This byte can start exactly one collation element and this is
1848      a single byte.  We can directly give the index to the weights.  */
1849   retval = obstack_object_size (pool) / sizeof (int32_t);
1850
1851   /* Construct the weight.  */
1852   for (cnt = 0; cnt < nrules; ++cnt)
1853     {
1854       int32_t buf[elem->weights[cnt].cnt];
1855       int i;
1856       int32_t j;
1857
1858       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1859         if (elem->weights[cnt].w[i] != NULL)
1860           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1861
1862       /* And add the buffer content.  */
1863       obstack_int32_grow (pool, j);
1864
1865       obstack_grow (pool, buf, j * sizeof (int32_t));
1866     }
1867
1868   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1869 }
1870
1871
1872 void
1873 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1874                 const char *output_path)
1875 {
1876   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1877   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1878   struct iovec iov[2 + nelems];
1879   struct locale_file data;
1880   uint32_t idx[nelems];
1881   size_t cnt;
1882   size_t ch;
1883   int32_t tablemb[256];
1884   struct obstack weightpool;
1885   struct obstack extrapool;
1886   struct obstack indirectpool;
1887   struct section_list *sect;
1888   struct collidx_table tablewc;
1889   uint32_t elem_size;
1890   uint32_t *elem_table;
1891   int i;
1892   struct element_t *runp;
1893
1894   data.magic = LIMAGIC (LC_COLLATE);
1895   data.n = nelems;
1896   iov[0].iov_base = (void *) &data;
1897   iov[0].iov_len = sizeof (data);
1898
1899   iov[1].iov_base = (void *) idx;
1900   iov[1].iov_len = sizeof (idx);
1901
1902   idx[0] = iov[0].iov_len + iov[1].iov_len;
1903   cnt = 0;
1904
1905   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1906   iov[2 + cnt].iov_base = &nrules;
1907   iov[2 + cnt].iov_len = sizeof (uint32_t);
1908   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1909   ++cnt;
1910
1911   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1912   if (collate == NULL)
1913     {
1914       int32_t dummy = 0;
1915
1916       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1917         {
1918           /* The words have to be handled specially.  */
1919           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1920             {
1921               iov[2 + cnt].iov_base = &dummy;
1922               iov[2 + cnt].iov_len = sizeof (int32_t);
1923             }
1924           else
1925             {
1926               iov[2 + cnt].iov_base = NULL;
1927               iov[2 + cnt].iov_len = 0;
1928             }
1929
1930           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1931             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1932           ++cnt;
1933         }
1934
1935       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1936
1937       write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1938
1939       return;
1940     }
1941
1942   obstack_init (&weightpool);
1943   obstack_init (&extrapool);
1944   obstack_init (&indirectpool);
1945
1946   /* Since we are using the sign of an integer to mark indirection the
1947      offsets in the arrays we are indirectly referring to must not be
1948      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
1949   obstack_int32_grow (&extrapool, 0);
1950   obstack_int32_grow (&indirectpool, 0);
1951
1952   /* Prepare the ruleset table.  */
1953   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1954     if (sect->rules != NULL && sect->ruleidx == i)
1955       {
1956         int j;
1957
1958         obstack_make_room (&weightpool, nrules);
1959
1960         for (j = 0; j < nrules; ++j)
1961           obstack_1grow_fast (&weightpool, sect->rules[j]);
1962         ++i;
1963       }
1964   /* And align the output.  */
1965   i = (nrules * i) % __alignof__ (int32_t);
1966   if (i > 0)
1967     do
1968       obstack_1grow (&weightpool, '\0');
1969     while (++i < __alignof__ (int32_t));
1970
1971   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
1972   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1973   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1974   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1975   ++cnt;
1976
1977   /* Generate the 8-bit table.  Walk through the lists of sequences
1978      starting with the same byte and add them one after the other to
1979      the table.  In case we have more than one sequence starting with
1980      the same byte we have to use extra indirection.
1981
1982      First add a record for the NUL byte.  This entry will never be used
1983      so it does not matter.  */
1984   tablemb[0] = 0;
1985
1986   /* Now insert the `UNDEFINED' value if it is used.  Since this value
1987      will probably be used more than once it is good to store the
1988      weights only once.  */
1989   if (collate->undefined.used_in_level != 0)
1990     output_weight (&weightpool, collate, &collate->undefined);
1991
1992   for (ch = 1; ch < 256; ++ch)
1993     if (collate->mbheads[ch]->mbnext == NULL
1994         && collate->mbheads[ch]->nmbs <= 1)
1995       {
1996         tablemb[ch] = output_weight (&weightpool, collate,
1997                                      collate->mbheads[ch]);
1998       }
1999     else
2000       {
2001         /* The entries in the list are sorted by length and then
2002            alphabetically.  This is the order in which we will add the
2003            elements to the collation table.  This allows simply walking
2004            the table in sequence and stopping at the first matching
2005            entry.  Since the longer sequences are coming first in the
2006            list they have the possibility to match first, just as it
2007            has to be.  In the worst case we are walking to the end of
2008            the list where we put, if no singlebyte sequence is defined
2009            in the locale definition, the weights for UNDEFINED.
2010
2011            To reduce the length of the search list we compress them a bit.
2012            This happens by collecting sequences of consecutive byte
2013            sequences in one entry (having and begin and end byte sequence)
2014            and add only one index into the weight table.  We can find the
2015            consecutive entries since they are also consecutive in the list.  */
2016         struct element_t *runp = collate->mbheads[ch];
2017         struct element_t *lastp;
2018
2019         assert ((obstack_object_size (&extrapool)
2020                  & (__alignof__ (int32_t) - 1)) == 0);
2021
2022         tablemb[ch] = -obstack_object_size (&extrapool);
2023
2024         do
2025           {
2026             /* Store the current index in the weight table.  We know that
2027                the current position in the `extrapool' is aligned on a
2028                32-bit address.  */
2029             int32_t weightidx;
2030             int added;
2031
2032             /* Find out wether this is a single entry or we have more than
2033                one consecutive entry.  */
2034             if (runp->mbnext != NULL
2035                 && runp->nmbs == runp->mbnext->nmbs
2036                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2037                 && (runp->mbs[runp->nmbs - 1]
2038                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2039               {
2040                 int i;
2041                 struct element_t *series_startp = runp;
2042                 struct element_t *curp;
2043
2044                 /* Compute how much space we will need.  */
2045                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2046                           + __alignof__ (int32_t) - 1)
2047                          & ~(__alignof__ (int32_t) - 1));
2048                 assert ((obstack_object_size (&extrapool)
2049                          & (__alignof__ (int32_t) - 1)) == 0);
2050                 obstack_make_room (&extrapool, added);
2051
2052                 /* More than one consecutive entry.  We mark this by having
2053                    a negative index into the indirect table.  */
2054                 obstack_int32_grow_fast (&extrapool,
2055                                          -(obstack_object_size (&indirectpool)
2056                                            / sizeof (int32_t)));
2057
2058                 /* Now search first the end of the series.  */
2059                 do
2060                   runp = runp->mbnext;
2061                 while (runp->mbnext != NULL
2062                        && runp->nmbs == runp->mbnext->nmbs
2063                        && memcmp (runp->mbs, runp->mbnext->mbs,
2064                                   runp->nmbs - 1) == 0
2065                        && (runp->mbs[runp->nmbs - 1]
2066                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2067
2068                 /* Now walk backward from here to the beginning.  */
2069                 curp = runp;
2070
2071                 assert (runp->nmbs <= 256);
2072                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2073                 for (i = 1; i < curp->nmbs; ++i)
2074                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2075
2076                 /* Now find the end of the consecutive sequence and
2077                    add all the indeces in the indirect pool.  */
2078                 do
2079                   {
2080                     weightidx = output_weight (&weightpool, collate, curp);
2081                     obstack_int32_grow (&indirectpool, weightidx);
2082
2083                     curp = curp->mblast;
2084                   }
2085                 while (curp != series_startp);
2086
2087                 /* Add the final weight.  */
2088                 weightidx = output_weight (&weightpool, collate, curp);
2089                 obstack_int32_grow (&indirectpool, weightidx);
2090
2091                 /* And add the end byte sequence.  Without length this
2092                    time.  */
2093                 for (i = 1; i < curp->nmbs; ++i)
2094                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2095               }
2096             else
2097               {
2098                 /* A single entry.  Simply add the index and the length and
2099                    string (except for the first character which is already
2100                    tested for).  */
2101                 int i;
2102
2103                 /* Output the weight info.  */
2104                 weightidx = output_weight (&weightpool, collate, runp);
2105
2106                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2107                           + __alignof__ (int32_t) - 1)
2108                          & ~(__alignof__ (int32_t) - 1));
2109                 assert ((obstack_object_size (&extrapool)
2110                          & (__alignof__ (int32_t) - 1)) == 0);
2111                 obstack_make_room (&extrapool, added);
2112
2113                 obstack_int32_grow_fast (&extrapool, weightidx);
2114                 assert (runp->nmbs <= 256);
2115                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2116
2117                 for (i = 1; i < runp->nmbs; ++i)
2118                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2119               }
2120
2121             /* Add alignment bytes if necessary.  */
2122             while ((obstack_object_size (&extrapool)
2123                     & (__alignof__ (int32_t) - 1)) != 0)
2124               obstack_1grow_fast (&extrapool, '\0');
2125
2126             /* Next entry.  */
2127             lastp = runp;
2128             runp = runp->mbnext;
2129           }
2130         while (runp != NULL);
2131
2132         assert ((obstack_object_size (&extrapool)
2133                  & (__alignof__ (int32_t) - 1)) == 0);
2134
2135         /* If the final entry in the list is not a single character we
2136            add an UNDEFINED entry here.  */
2137         if (lastp->nmbs != 1)
2138           {
2139             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2140                          & ~(__alignof__ (int32_t) - 1));
2141             obstack_make_room (&extrapool, added);
2142
2143             obstack_int32_grow_fast (&extrapool, 0);
2144             /* XXX What rule? We just pick the first.  */
2145             obstack_1grow_fast (&extrapool, 0);
2146             /* Length is zero.  */
2147             obstack_1grow_fast (&extrapool, 0);
2148
2149             /* Add alignment bytes if necessary.  */
2150             while ((obstack_object_size (&extrapool)
2151                     & (__alignof__ (int32_t) - 1)) != 0)
2152               obstack_1grow_fast (&extrapool, '\0');
2153           }
2154       }
2155
2156   /* Add padding to the tables if necessary.  */
2157   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2158          != 0)
2159     obstack_1grow (&weightpool, 0);
2160
2161   /* Now add the four tables.  */
2162   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2163   iov[2 + cnt].iov_base = tablemb;
2164   iov[2 + cnt].iov_len = sizeof (tablemb);
2165   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2166   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2167   ++cnt;
2168
2169   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2170   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2171   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2172   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2173   ++cnt;
2174
2175   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2176   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2177   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2178   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2179   ++cnt;
2180
2181   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2182   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2183   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2184   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2185   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2186   ++cnt;
2187
2188
2189   /* Now the same for the wide character table.  We need to store some
2190      more information here.  */
2191   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2192   iov[2 + cnt].iov_base = NULL;
2193   iov[2 + cnt].iov_len = 0;
2194   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2195   assert (idx[cnt] % 4 == 0);
2196   ++cnt;
2197
2198   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2199   iov[2 + cnt].iov_base = NULL;
2200   iov[2 + cnt].iov_len = 0;
2201   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2202   assert (idx[cnt] % 4 == 0);
2203   ++cnt;
2204
2205   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2206   iov[2 + cnt].iov_base = NULL;
2207   iov[2 + cnt].iov_len = 0;
2208   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2209   assert (idx[cnt] % 4 == 0);
2210   ++cnt;
2211
2212   /* Since we are using the sign of an integer to mark indirection the
2213      offsets in the arrays we are indirectly referring to must not be
2214      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2215   obstack_int32_grow (&extrapool, 0);
2216   obstack_int32_grow (&indirectpool, 0);
2217
2218   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2219      will probably be used more than once it is good to store the
2220      weights only once.  */
2221   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2222     abort ();
2223
2224   /* Generate the table.  Walk through the lists of sequences starting
2225      with the same wide character and add them one after the other to
2226      the table.  In case we have more than one sequence starting with
2227      the same byte we have to use extra indirection.  */
2228   {
2229     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2230       {
2231         if (runp->wcnext == NULL && runp->nwcs == 1)
2232           {
2233             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2234             collidx_table_add (&tablewc, ch, weigthidx);
2235           }
2236         else
2237           {
2238             /* As for the singlebyte table, we recognize sequences and
2239                compress them.  */
2240             struct element_t *lastp;
2241
2242             collidx_table_add (&tablewc, ch,
2243                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2244
2245             do
2246               {
2247                 /* Store the current index in the weight table.  We know that
2248                    the current position in the `extrapool' is aligned on a
2249                    32-bit address.  */
2250                 int32_t weightidx;
2251                 int added;
2252
2253                 /* Find out wether this is a single entry or we have more than
2254                    one consecutive entry.  */
2255                 if (runp->wcnext != NULL
2256                     && runp->nwcs == runp->wcnext->nwcs
2257                     && wmemcmp ((wchar_t *) runp->wcs,
2258                                 (wchar_t *)runp->wcnext->wcs,
2259                                 runp->nwcs - 1) == 0
2260                     && (runp->wcs[runp->nwcs - 1]
2261                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2262                   {
2263                     int i;
2264                     struct element_t *series_startp = runp;
2265                     struct element_t *curp;
2266
2267                     /* Now add first the initial byte sequence.  */
2268                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2269                     if (sizeof (int32_t) == sizeof (int))
2270                       obstack_make_room (&extrapool, added);
2271
2272                     /* More than one consecutive entry.  We mark this by having
2273                        a negative index into the indirect table.  */
2274                     obstack_int32_grow_fast (&extrapool,
2275                                              -(obstack_object_size (&indirectpool)
2276                                                / sizeof (int32_t)));
2277                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2278
2279                     do
2280                       runp = runp->wcnext;
2281                     while (runp->wcnext != NULL
2282                            && runp->nwcs == runp->wcnext->nwcs
2283                            && wmemcmp ((wchar_t *) runp->wcs,
2284                                        (wchar_t *)runp->wcnext->wcs,
2285                                        runp->nwcs - 1) == 0
2286                            && (runp->wcs[runp->nwcs - 1]
2287                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2288
2289                     /* Now walk backward from here to the beginning.  */
2290                     curp = runp;
2291
2292                     for (i = 1; i < runp->nwcs; ++i)
2293                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2294
2295                     /* Now find the end of the consecutive sequence and
2296                        add all the indeces in the indirect pool.  */
2297                     do
2298                       {
2299                         weightidx = output_weightwc (&weightpool, collate,
2300                                                      curp);
2301                         obstack_int32_grow (&indirectpool, weightidx);
2302
2303                         curp = curp->wclast;
2304                       }
2305                     while (curp != series_startp);
2306
2307                     /* Add the final weight.  */
2308                     weightidx = output_weightwc (&weightpool, collate, curp);
2309                     obstack_int32_grow (&indirectpool, weightidx);
2310
2311                     /* And add the end byte sequence.  Without length this
2312                        time.  */
2313                     for (i = 1; i < curp->nwcs; ++i)
2314                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2315                   }
2316                 else
2317                   {
2318                     /* A single entry.  Simply add the index and the length and
2319                        string (except for the first character which is already
2320                        tested for).  */
2321                     int i;
2322
2323                     /* Output the weight info.  */
2324                     weightidx = output_weightwc (&weightpool, collate, runp);
2325
2326                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2327                     if (sizeof (int) == sizeof (int32_t))
2328                       obstack_make_room (&extrapool, added);
2329
2330                     obstack_int32_grow_fast (&extrapool, weightidx);
2331                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2332                     for (i = 1; i < runp->nwcs; ++i)
2333                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2334                   }
2335
2336                 /* Next entry.  */
2337                 lastp = runp;
2338                 runp = runp->wcnext;
2339               }
2340             while (runp != NULL);
2341           }
2342       }
2343
2344     tablewc.p = 6;
2345     tablewc.q = 10;
2346     collidx_table_init (&tablewc);
2347
2348     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2349
2350     collidx_table_finalize (&tablewc);
2351   }
2352
2353   /* Now add the four tables.  */
2354   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2355   iov[2 + cnt].iov_base = tablewc.result;
2356   iov[2 + cnt].iov_len = tablewc.result_size;
2357   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2358   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2359   assert (idx[cnt] % 4 == 0);
2360   ++cnt;
2361
2362   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2363   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2364   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2365   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2366   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2367   assert (idx[cnt] % 4 == 0);
2368   ++cnt;
2369
2370   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2371   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2372   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2373   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2374   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2375   assert (idx[cnt] % 4 == 0);
2376   ++cnt;
2377
2378   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2379   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2380   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2381   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2382   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2383   assert (idx[cnt] % 4 == 0);
2384   ++cnt;
2385
2386
2387   /* Finally write the table with collation element names out.  It is
2388      a hash table with a simple function which gets the name of the
2389      character as the input.  One character might have many names.  The
2390      value associated with the name is an index into the weight table
2391      where we are then interested in the first-level weight value.
2392
2393      To determine how large the table should be we are counting the
2394      elements have to put in.  Since we are using internal chaining
2395      using a secondary hash function we have to make the table a bit
2396      larger to avoid extremely long search times.  We can achieve
2397      good results with a 40% larger table than there are entries.  */
2398   elem_size = 0;
2399   runp = collate->start;
2400   while (runp != NULL)
2401     {
2402       if (runp->mbs != NULL && runp->weights != NULL)
2403         /* Yep, the element really counts.  */
2404         ++elem_size;
2405
2406       runp = runp->next;
2407     }
2408   /* Add 40% and find the next prime number.  */
2409   elem_size = MIN (next_prime (elem_size * 1.4), 257);
2410
2411   /* Allocate the table.  Each entry consists of two words: the hash
2412      value and an index in a secondary table which provides the index
2413      into the weight table and the string itself (so that a match can
2414      be determined).  */
2415   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2416                                            elem_size * 2 * sizeof (uint32_t));
2417   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2418
2419   /* Now add the elements.  */
2420   runp = collate->start;
2421   while (runp != NULL)
2422     {
2423       if (runp->mbs != NULL && runp->weights != NULL)
2424         {
2425           /* Compute the hash value of the name.  */
2426           uint32_t namelen = strlen (runp->name);
2427           uint32_t hash = elem_hash (runp->name, namelen);
2428           size_t idx = hash % elem_size;
2429
2430           if (elem_table[idx * 2] != 0)
2431             {
2432               /* The spot is already take.  Try iterating using the value
2433                  from the secondary hashing function.  */
2434               size_t iter = hash % (elem_size - 2);
2435
2436               do
2437                 {
2438                   idx += iter;
2439                   if (idx >= elem_size)
2440                     idx -= elem_size;
2441                 }
2442               while (elem_table[idx * 2] != 0);
2443
2444               /* This is the spot where we will insert the value.  */
2445               elem_table[idx * 2] = hash;
2446               elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2447
2448               /* The the string itself including length.  */
2449               obstack_1grow (&extrapool, namelen);
2450               obstack_grow (&extrapool, runp->name, namelen);
2451
2452               /* And the multibyte representation.  */
2453               obstack_1grow (&extrapool, runp->nmbs);
2454               obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2455
2456               /* And align again to 32 bits.  */
2457               if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2458                 obstack_grow (&extrapool, "\0\0",
2459                               (sizeof (int32_t)
2460                                - ((1 + namelen + 1 + runp->nmbs)
2461                                   % sizeof (int32_t))));
2462
2463               /* Now some 32-bit values: multibyte collation sequence,
2464                  wide char string (including length), and wide char
2465                  collation sequence.  */
2466               obstack_int32_grow (&extrapool, runp->mbseqorder);
2467
2468               obstack_int32_grow (&extrapool, runp->nwcs);
2469               obstack_grow (&extrapool, runp->wcs,
2470                             runp->nwcs * sizeof (uint32_t));
2471
2472               obstack_int32_grow (&extrapool, runp->wcseqorder);
2473             }
2474         }
2475
2476       runp = runp->next;
2477     }
2478
2479   /* Prepare to write out this data.  */
2480   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2481   iov[2 + cnt].iov_base = &elem_size;
2482   iov[2 + cnt].iov_len = sizeof (int32_t);
2483   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2484   assert (idx[cnt] % 4 == 0);
2485   ++cnt;
2486
2487   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2488   iov[2 + cnt].iov_base = elem_table;
2489   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2490   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2491   assert (idx[cnt] % 4 == 0);
2492   ++cnt;
2493
2494   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2495   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2496   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2497   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2498   ++cnt;
2499
2500   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2501   iov[2 + cnt].iov_base = collate->mbseqorder;
2502   iov[2 + cnt].iov_len = 256;
2503   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2504   ++cnt;
2505
2506   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2507   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2508   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2509   assert (idx[cnt] % 4 == 0);
2510   ++cnt;
2511
2512   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2513
2514   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
2515
2516   obstack_free (&weightpool, NULL);
2517   obstack_free (&extrapool, NULL);
2518   obstack_free (&indirectpool, NULL);
2519 }
2520
2521
2522 void
2523 collate_read (struct linereader *ldfile, struct localedef_t *result,
2524               struct charmap_t *charmap, const char *repertoire_name,
2525               int ignore_content)
2526 {
2527   struct repertoire_t *repertoire = NULL;
2528   struct locale_collate_t *collate;
2529   struct token *now;
2530   struct token *arg = NULL;
2531   enum token_t nowtok;
2532   int state = 0;
2533   enum token_t was_ellipsis = tok_none;
2534   struct localedef_t *copy_locale = NULL;
2535
2536   /* Get the repertoire we have to use.  */
2537   if (repertoire_name != NULL)
2538     repertoire = repertoire_read (repertoire_name);
2539
2540   /* The rest of the line containing `LC_COLLATE' must be free.  */
2541   lr_ignore_rest (ldfile, 1);
2542
2543   do
2544     {
2545       now = lr_token (ldfile, charmap, NULL);
2546       nowtok = now->tok;
2547     }
2548   while (nowtok == tok_eol);
2549
2550   if (nowtok == tok_copy)
2551     {
2552       state = 2;
2553       now = lr_token (ldfile, charmap, NULL);
2554       if (now->tok != tok_string)
2555         {
2556           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2557
2558         skip_category:
2559           do
2560             now = lr_token (ldfile, charmap, NULL);
2561           while (now->tok != tok_eof && now->tok != tok_end);
2562
2563           if (now->tok != tok_eof
2564               || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
2565             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2566           else if (now->tok != tok_lc_collate)
2567             {
2568               lr_error (ldfile, _("\
2569 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2570               lr_ignore_rest (ldfile, 0);
2571             }
2572           else
2573             lr_ignore_rest (ldfile, 1);
2574
2575           return;
2576         }
2577
2578       if (! ignore_content)
2579         {
2580           /* Get the locale definition.  */
2581           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2582                                      repertoire_name, charmap, NULL);
2583           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2584             {
2585               /* Not yet loaded.  So do it now.  */
2586               if (locfile_read (copy_locale, charmap) != 0)
2587                 goto skip_category;
2588             }
2589         }
2590
2591       lr_ignore_rest (ldfile, 1);
2592
2593       now = lr_token (ldfile, charmap, NULL);
2594       nowtok = now->tok;
2595     }
2596
2597   /* Prepare the data structures.  */
2598   collate_startup (ldfile, result, copy_locale, ignore_content);
2599   collate = result->categories[LC_COLLATE].collate;
2600
2601   while (1)
2602     {
2603       char ucs4buf[10];
2604       char *symstr;
2605       size_t symlen;
2606
2607       /* Of course we don't proceed beyond the end of file.  */
2608       if (nowtok == tok_eof)
2609         break;
2610
2611       /* Ingore empty lines.  */
2612       if (nowtok == tok_eol)
2613         {
2614           now = lr_token (ldfile, charmap, NULL);
2615           nowtok = now->tok;
2616           continue;
2617         }
2618
2619       switch (nowtok)
2620         {
2621         case tok_copy:
2622           /* Allow copying other locales.  */
2623           now = lr_token (ldfile, charmap, NULL);
2624           if (now->tok != tok_string)
2625             goto err_label;
2626
2627           if (! ignore_content)
2628             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2629                          charmap, result);
2630
2631           lr_ignore_rest (ldfile, 1);
2632           break;
2633
2634         case tok_coll_weight_max:
2635           /* Ignore the rest of the line if we don't need the input of
2636              this line.  */
2637           if (ignore_content)
2638             {
2639               lr_ignore_rest (ldfile, 0);
2640               break;
2641             }
2642
2643           if (state != 0)
2644             goto err_label;
2645
2646           arg = lr_token (ldfile, charmap, NULL);
2647           if (arg->tok != tok_number)
2648             goto err_label;
2649           if (collate->col_weight_max != -1)
2650             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2651                       "LC_COLLATE", "col_weight_max");
2652           else
2653             collate->col_weight_max = arg->val.num;
2654           lr_ignore_rest (ldfile, 1);
2655           break;
2656
2657         case tok_section_symbol:
2658           /* Ignore the rest of the line if we don't need the input of
2659              this line.  */
2660           if (ignore_content)
2661             {
2662               lr_ignore_rest (ldfile, 0);
2663               break;
2664             }
2665
2666           if (state != 0)
2667             goto err_label;
2668
2669           arg = lr_token (ldfile, charmap, repertoire);
2670           if (arg->tok != tok_bsymbol)
2671             goto err_label;
2672           else if (!ignore_content)
2673             {
2674               /* Check whether this section is already known.  */
2675               struct section_list *known = collate->sections;
2676               while (known != NULL)
2677                 {
2678                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2679                     break;
2680                   known = known->next;
2681                 }
2682
2683               if (known != NULL)
2684                 {
2685                   lr_error (ldfile,
2686                             _("%s: duplicate declaration of section `%s'"),
2687                             "LC_COLLATE", arg->val.str.startmb);
2688                   free (arg->val.str.startmb);
2689                 }
2690               else
2691                 collate->sections = make_seclist_elem (collate,
2692                                                        arg->val.str.startmb,
2693                                                        collate->sections);
2694
2695               lr_ignore_rest (ldfile, known == NULL);
2696             }
2697           else
2698             {
2699               free (arg->val.str.startmb);
2700               lr_ignore_rest (ldfile, 0);
2701             }
2702           break;
2703
2704         case tok_collating_element:
2705           /* Ignore the rest of the line if we don't need the input of
2706              this line.  */
2707           if (ignore_content)
2708             {
2709               lr_ignore_rest (ldfile, 0);
2710               break;
2711             }
2712
2713           if (state != 0)
2714             goto err_label;
2715
2716           arg = lr_token (ldfile, charmap, repertoire);
2717           if (arg->tok != tok_bsymbol)
2718             goto err_label;
2719           else
2720             {
2721               const char *symbol = arg->val.str.startmb;
2722               size_t symbol_len = arg->val.str.lenmb;
2723
2724               /* Next the `from' keyword.  */
2725               arg = lr_token (ldfile, charmap, repertoire);
2726               if (arg->tok != tok_from)
2727                 {
2728                   free ((char *) symbol);
2729                   goto err_label;
2730                 }
2731
2732               ldfile->return_widestr = 1;
2733               ldfile->translate_strings = 1;
2734
2735               /* Finally the string with the replacement.  */
2736               arg = lr_token (ldfile, charmap, repertoire);
2737
2738               ldfile->return_widestr = 0;
2739               ldfile->translate_strings = 0;
2740
2741               if (arg->tok != tok_string)
2742                 goto err_label;
2743
2744               if (!ignore_content && symbol != NULL)
2745                 {
2746                   /* The name is already defined.  */
2747                   if (check_duplicate (ldfile, collate, charmap,
2748                                        repertoire, symbol, symbol_len))
2749                     goto col_elem_free;
2750
2751                   if (arg->val.str.startmb != NULL)
2752                     insert_entry (&collate->elem_table, symbol, symbol_len,
2753                                   new_element (collate,
2754                                                arg->val.str.startmb,
2755                                                arg->val.str.lenmb - 1,
2756                                                arg->val.str.startwc,
2757                                                symbol, symbol_len, 0));
2758                 }
2759               else
2760                 {
2761                 col_elem_free:
2762                   if (symbol != NULL)
2763                     free ((char *) symbol);
2764                   if (arg->val.str.startmb != NULL)
2765                     free (arg->val.str.startmb);
2766                   if (arg->val.str.startwc != NULL)
2767                     free (arg->val.str.startwc);
2768                 }
2769               lr_ignore_rest (ldfile, 1);
2770             }
2771           break;
2772
2773         case tok_collating_symbol:
2774           /* Ignore the rest of the line if we don't need the input of
2775              this line.  */
2776           if (ignore_content)
2777             {
2778               lr_ignore_rest (ldfile, 0);
2779               break;
2780             }
2781
2782           if (state != 0 && state != 2)
2783             goto err_label;
2784
2785           arg = lr_token (ldfile, charmap, repertoire);
2786           if (arg->tok != tok_bsymbol)
2787             goto err_label;
2788           else
2789             {
2790               char *symbol = arg->val.str.startmb;
2791               size_t symbol_len = arg->val.str.lenmb;
2792               char *endsymbol = NULL;
2793               size_t endsymbol_len = 0;
2794               enum token_t ellipsis = tok_none;
2795
2796               arg = lr_token (ldfile, charmap, repertoire);
2797               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2798                 {
2799                   ellipsis = arg->tok;
2800
2801                   arg = lr_token (ldfile, charmap, repertoire);
2802                   if (arg->tok != tok_bsymbol)
2803                     {
2804                       free (symbol);
2805                       goto err_label;
2806                     }
2807
2808                   endsymbol = arg->val.str.startmb;
2809                   endsymbol_len = arg->val.str.lenmb;
2810
2811                   lr_ignore_rest (ldfile, 1);
2812                 }
2813               else if (arg->tok != tok_eol)
2814                 {
2815                   free (symbol);
2816                   goto err_label;
2817                 }
2818
2819               if (!ignore_content)
2820                 {
2821                   if (symbol == NULL
2822                       || (ellipsis != tok_none && endsymbol == NULL))
2823                     {
2824                       lr_error (ldfile, _("\
2825 %s: unknown character in collating symbol name"),
2826                                 "LC_COLLATE");
2827                       goto col_sym_free;
2828                     }
2829                   else if (ellipsis == tok_none)
2830                     {
2831                       /* The name is already defined.  */
2832                       if (check_duplicate (ldfile, collate, charmap,
2833                                            repertoire, symbol, symbol_len))
2834                         goto col_sym_free;
2835
2836                       insert_entry (&collate->sym_table, symbol, symbol_len,
2837                                     new_symbol (collate, symbol, symbol_len));
2838                     }
2839                   else if (symbol_len != endsymbol_len)
2840                     {
2841                     col_sym_inv_range:
2842                       lr_error (ldfile,
2843                                 _("invalid names for character range"));
2844                       goto col_sym_free;
2845                     }
2846                   else
2847                     {
2848                       /* Oh my, we have to handle an ellipsis.  First, as
2849                          usual, determine the common prefix and then
2850                          convert the rest into a range.  */
2851                       size_t prefixlen;
2852                       unsigned long int from;
2853                       unsigned long int to;
2854                       char *endp;
2855
2856                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2857                         if (symbol[prefixlen] != endsymbol[prefixlen])
2858                           break;
2859
2860                       /* Convert the rest into numbers.  */
2861                       symbol[symbol_len] = '\0';
2862                       from = strtoul (&symbol[prefixlen], &endp,
2863                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2864                       if (*endp != '\0')
2865                         goto col_sym_inv_range;
2866
2867                       endsymbol[symbol_len] = '\0';
2868                       to = strtoul (&endsymbol[prefixlen], &endp,
2869                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2870                       if (*endp != '\0')
2871                         goto col_sym_inv_range;
2872
2873                       if (from > to)
2874                         goto col_sym_inv_range;
2875
2876                       /* Now loop over all entries.  */
2877                       while (from <= to)
2878                         {
2879                           char *symbuf;
2880
2881                           symbuf = (char *) obstack_alloc (&collate->mempool,
2882                                                            symbol_len + 1);
2883
2884                           /* Create the name.  */
2885                           sprintf (symbuf,
2886                                    ellipsis == tok_ellipsis2
2887                                    ? "%.*s%.*lX" : "%.*s%.*lX",
2888                                    (int) prefixlen, symbol,
2889                                    (int) (symbol_len - prefixlen), from);
2890
2891                           /* The name is already defined.  */
2892                           if (check_duplicate (ldfile, collate, charmap,
2893                                                repertoire, symbuf, symbol_len))
2894                             goto col_sym_free;
2895
2896                           insert_entry (&collate->sym_table, symbuf,
2897                                         symbol_len,
2898                                         new_symbol (collate, symbuf,
2899                                                     symbol_len));
2900
2901                           /* Increment the counter.  */
2902                           ++from;
2903                         }
2904
2905                       goto col_sym_free;
2906                     }
2907                 }
2908               else
2909                 {
2910                 col_sym_free:
2911                   if (symbol != NULL)
2912                     free (symbol);
2913                   if (endsymbol != NULL)
2914                     free (endsymbol);
2915                 }
2916             }
2917           break;
2918
2919         case tok_symbol_equivalence:
2920           /* Ignore the rest of the line if we don't need the input of
2921              this line.  */
2922           if (ignore_content)
2923             {
2924               lr_ignore_rest (ldfile, 0);
2925               break;
2926             }
2927
2928           if (state != 0)
2929             goto err_label;
2930
2931           arg = lr_token (ldfile, charmap, repertoire);
2932           if (arg->tok != tok_bsymbol)
2933             goto err_label;
2934           else
2935             {
2936               const char *newname = arg->val.str.startmb;
2937               size_t newname_len = arg->val.str.lenmb;
2938               const char *symname;
2939               size_t symname_len;
2940               struct symbol_t *symval;
2941
2942               arg = lr_token (ldfile, charmap, repertoire);
2943               if (arg->tok != tok_bsymbol)
2944                 {
2945                   if (newname != NULL)
2946                     free ((char *) newname);
2947                   goto err_label;
2948                 }
2949
2950               symname = arg->val.str.startmb;
2951               symname_len = arg->val.str.lenmb;
2952
2953               if (newname == NULL)
2954                 {
2955                   lr_error (ldfile, _("\
2956 %s: unknown character in equivalent definition name"),
2957                             "LC_COLLATE");
2958
2959                 sym_equiv_free:
2960                   if (newname != NULL)
2961                     free ((char *) newname);
2962                   if (symname != NULL)
2963                     free ((char *) symname);
2964                   break;
2965                 }
2966               if (symname == NULL)
2967                 {
2968                   lr_error (ldfile, _("\
2969 %s: unknown character in equivalent definition value"),
2970                             "LC_COLLATE");
2971                   goto sym_equiv_free;
2972                 }
2973
2974               /* See whether the symbol name is already defined.  */
2975               if (find_entry (&collate->sym_table, symname, symname_len,
2976                               (void **) &symval) != 0)
2977                 {
2978                   lr_error (ldfile, _("\
2979 %s: unknown symbol `%s' in equivalent definition"),
2980                             "LC_COLLATE", symname);
2981                   goto col_sym_free;
2982                 }
2983
2984               if (insert_entry (&collate->sym_table,
2985                                 newname, newname_len, symval) < 0)
2986                 {
2987                   lr_error (ldfile, _("\
2988 error while adding equivalent collating symbol"));
2989                   goto sym_equiv_free;
2990                 }
2991
2992               free ((char *) symname);
2993             }
2994           lr_ignore_rest (ldfile, 1);
2995           break;
2996
2997         case tok_script:
2998           /* We get told about the scripts we know.  */
2999           arg = lr_token (ldfile, charmap, repertoire);
3000           if (arg->tok != tok_bsymbol)
3001             goto err_label;
3002           else
3003             {
3004               struct section_list *runp = collate->known_sections;
3005               char *name;
3006
3007               while (runp != NULL)
3008                 if (strncmp (runp->name, arg->val.str.startmb,
3009                              arg->val.str.lenmb) == 0
3010                     && runp->name[arg->val.str.lenmb] == '\0')
3011                   break;
3012                 else
3013                   runp = runp->def_next;
3014
3015               if (runp != NULL)
3016                 {
3017                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3018                             runp->name);
3019                   lr_ignore_rest (ldfile, 0);
3020                   break;
3021                 }
3022
3023               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3024               name = strncpy (xmalloc (arg->val.str.lenmb + 1),
3025                               arg->val.str.startmb, arg->val.str.lenmb);
3026               name[arg->val.str.lenmb] = '\0';
3027               runp->name = name;
3028
3029               runp->def_next = collate->known_sections;
3030               collate->known_sections = runp;
3031             }
3032           lr_ignore_rest (ldfile, 1);
3033           break;
3034
3035         case tok_order_start:
3036           /* Ignore the rest of the line if we don't need the input of
3037              this line.  */
3038           if (ignore_content)
3039             {
3040               lr_ignore_rest (ldfile, 0);
3041               break;
3042             }
3043
3044           if (state != 0 && state != 1)
3045             goto err_label;
3046           state = 1;
3047
3048           /* The 14652 draft does not specify whether all `order_start' lines
3049              must contain the same number of sort-rules, but 14651 does.  So
3050              we require this here as well.  */
3051           arg = lr_token (ldfile, charmap, repertoire);
3052           if (arg->tok == tok_bsymbol)
3053             {
3054               /* This better should be a section name.  */
3055               struct section_list *sp = collate->known_sections;
3056               while (sp != NULL
3057                      && (sp->name == NULL
3058                          || strncmp (sp->name, arg->val.str.startmb,
3059                                      arg->val.str.lenmb) != 0
3060                          || sp->name[arg->val.str.lenmb] != '\0'))
3061                 sp = sp->def_next;
3062
3063               if (sp == NULL)
3064                 {
3065                   lr_error (ldfile, _("\
3066 %s: unknown section name `%s'"),
3067                             "LC_COLLATE", arg->val.str.startmb);
3068                   /* We use the error section.  */
3069                   collate->current_section = &collate->error_section;
3070
3071                   if (collate->error_section.first == NULL)
3072                     {
3073                       if (collate->sections == NULL)
3074                         collate->sections = &collate->error_section;
3075                       else
3076                         {
3077                           sp = collate->sections;
3078                           while (sp->next != NULL)
3079                             sp = sp->next;
3080
3081                           collate->error_section.next = NULL;
3082                           sp->next = &collate->error_section;
3083                         }
3084                     }
3085                 }
3086               else
3087                 {
3088                   /* One should not be allowed to open the same
3089                      section twice.  */
3090                   if (sp->first != NULL)
3091                     lr_error (ldfile, _("\
3092 %s: multiple order definitions for section `%s'"),
3093                               "LC_COLLATE", sp->name);
3094                   else
3095                     {
3096                       if (collate->current_section == NULL)
3097                         collate->current_section = sp;
3098                       else
3099                         {
3100                           sp->next = collate->current_section->next;
3101                           collate->current_section->next = sp;
3102                         }
3103                     }
3104
3105                   /* Next should come the end of the line or a semicolon.  */
3106                   arg = lr_token (ldfile, charmap, repertoire);
3107                   if (arg->tok == tok_eol)
3108                     {
3109                       uint32_t cnt;
3110
3111                       /* This means we have exactly one rule: `forward'.  */
3112                       if (nrules > 1)
3113                         lr_error (ldfile, _("\
3114 %s: invalid number of sorting rules"),
3115                                   "LC_COLLATE");
3116                       else
3117                         nrules = 1;
3118                       sp->rules = obstack_alloc (&collate->mempool,
3119                                                  (sizeof (enum coll_sort_rule)
3120                                                   * nrules));
3121                       for (cnt = 0; cnt < nrules; ++cnt)
3122                         sp->rules[cnt] = sort_forward;
3123
3124                       /* Next line.  */
3125                       break;
3126                     }
3127
3128                   /* Get the next token.  */
3129                   arg = lr_token (ldfile, charmap, repertoire);
3130                 }
3131             }
3132           else
3133             {
3134               /* There is no section symbol.  Therefore we use the unnamed
3135                  section.  */
3136               collate->current_section = &collate->unnamed_section;
3137
3138               if (collate->unnamed_section.first != NULL)
3139                 lr_error (ldfile, _("\
3140 %s: multiple order definitions for unnamed section"),
3141                           "LC_COLLATE");
3142               else
3143                 {
3144                   collate->unnamed_section.next = collate->sections;
3145                   collate->sections = &collate->unnamed_section;
3146                 }
3147             }
3148
3149           /* Now read the direction names.  */
3150           read_directions (ldfile, arg, charmap, repertoire, collate);
3151
3152           /* From now be need the strings untranslated.  */
3153           ldfile->translate_strings = 0;
3154           break;
3155
3156         case tok_order_end:
3157           /* Ignore the rest of the line if we don't need the input of
3158              this line.  */
3159           if (ignore_content)
3160             {
3161               lr_ignore_rest (ldfile, 0);
3162               break;
3163             }
3164
3165           if (state != 1)
3166             goto err_label;
3167
3168           /* Handle ellipsis at end of list.  */
3169           if (was_ellipsis != tok_none)
3170             {
3171               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3172                                repertoire, collate);
3173               was_ellipsis = tok_none;
3174             }
3175
3176           state = 2;
3177           lr_ignore_rest (ldfile, 1);
3178           break;
3179
3180         case tok_reorder_after:
3181           /* Ignore the rest of the line if we don't need the input of
3182              this line.  */
3183           if (ignore_content)
3184             {
3185               lr_ignore_rest (ldfile, 0);
3186               break;
3187             }
3188
3189           if (state == 1)
3190             {
3191               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3192                         "LC_COLLATE");
3193               state = 2;
3194
3195               /* Handle ellipsis at end of list.  */
3196               if (was_ellipsis != tok_none)
3197                 {
3198                   handle_ellipsis (ldfile, arg->val.str.startmb,
3199                                    arg->val.str.lenmb, was_ellipsis, charmap,
3200                                    repertoire, collate);
3201                   was_ellipsis = tok_none;
3202                 }
3203             }
3204           else if (state != 2 && state != 3)
3205             goto err_label;
3206           state = 3;
3207
3208           arg = lr_token (ldfile, charmap, repertoire);
3209           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3210             {
3211               /* Find this symbol in the sequence table.  */
3212               char ucsbuf[10];
3213               char *startmb;
3214               size_t lenmb;
3215               struct element_t *insp;
3216               int no_error = 1;
3217
3218               if (arg->tok == tok_bsymbol)
3219                 {
3220                   startmb = arg->val.str.startmb;
3221                   lenmb = arg->val.str.lenmb;
3222                 }
3223               else
3224                 {
3225                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3226                   startmb = ucsbuf;
3227                   lenmb = 9;
3228                 }
3229
3230               if (find_entry (&collate->seq_table, startmb, lenmb,
3231                               (void **) &insp) == 0)
3232                 /* Yes, the symbol exists.  Simply point the cursor
3233                    to it.  */
3234                   collate->cursor = insp;
3235               else
3236                 {
3237                   struct symbol_t *symbp;
3238
3239                   if (find_entry (&collate->sym_table, startmb, lenmb,
3240                                   (void **) &symbp) == 0)
3241                     {
3242                       if (symbp->order->last != NULL
3243                           || symbp->order->next != NULL)
3244                         collate->cursor = symbp->order;
3245                       else
3246                         {
3247                           /* This is a collating symbol but its position
3248                              is not yet defined.  */
3249                           lr_error (ldfile, _("\
3250 %s: order for collating symbol %.*s not yet defined"),
3251                                     "LC_COLLATE", (int) lenmb, startmb);
3252                           collate->cursor = NULL;
3253                           no_error = 0;
3254                         }
3255                     }
3256                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3257                                        (void **) &insp) == 0)
3258                     {
3259                       if (insp->last != NULL || insp->next != NULL)
3260                         collate->cursor = insp;
3261                       else
3262                         {
3263                           /* This is a collating element but its position
3264                              is not yet defined.  */
3265                           lr_error (ldfile, _("\
3266 %s: order for collating element %.*s not yet defined"),
3267                                     "LC_COLLATE", (int) lenmb, startmb);
3268                           collate->cursor = NULL;
3269                           no_error = 0;
3270                         }
3271                     }
3272                   else
3273                     {
3274                       /* This is bad.  The symbol after which we have to
3275                          insert does not exist.  */
3276                       lr_error (ldfile, _("\
3277 %s: cannot reorder after %.*s: symbol not known"),
3278                                 "LC_COLLATE", (int) lenmb, startmb);
3279                       collate->cursor = NULL;
3280                       no_error = 0;
3281                     }
3282                 }
3283
3284               lr_ignore_rest (ldfile, no_error);
3285             }
3286           else
3287             /* This must not happen.  */
3288             goto err_label;
3289           break;
3290
3291         case tok_reorder_end:
3292           /* Ignore the rest of the line if we don't need the input of
3293              this line.  */
3294           if (ignore_content)
3295             break;
3296
3297           if (state != 3)
3298             goto err_label;
3299           state = 4;
3300           lr_ignore_rest (ldfile, 1);
3301           break;
3302
3303         case tok_reorder_sections_after:
3304           /* Ignore the rest of the line if we don't need the input of
3305              this line.  */
3306           if (ignore_content)
3307             {
3308               lr_ignore_rest (ldfile, 0);
3309               break;
3310             }
3311
3312           if (state == 1)
3313             {
3314               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3315                         "LC_COLLATE");
3316               state = 2;
3317
3318               /* Handle ellipsis at end of list.  */
3319               if (was_ellipsis != tok_none)
3320                 {
3321                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3322                                    repertoire, collate);
3323                   was_ellipsis = tok_none;
3324                 }
3325             }
3326           else if (state == 3)
3327             {
3328               error (0, 0, _("%s: missing `reorder-end' keyword"),
3329                      "LC_COLLATE");
3330               state = 4;
3331             }
3332           else if (state != 2 && state != 4)
3333             goto err_label;
3334           state = 5;
3335
3336           /* Get the name of the sections we are adding after.  */
3337           arg = lr_token (ldfile, charmap, repertoire);
3338           if (arg->tok == tok_bsymbol)
3339             {
3340               /* Now find a section with this name.  */
3341               struct section_list *runp = collate->sections;
3342
3343               while (runp != NULL)
3344                 {
3345                   if (runp->name != NULL
3346                       && strlen (runp->name) == arg->val.str.lenmb
3347                       && memcmp (runp->name, arg->val.str.startmb,
3348                                  arg->val.str.lenmb) == 0)
3349                     break;
3350
3351                   runp = runp->next;
3352                 }
3353
3354               if (runp != NULL)
3355                 collate->current_section = runp;
3356               else
3357                 {
3358                   /* This is bad.  The section after which we have to
3359                      reorder does not exist.  Therefore we cannot
3360                      process the whole rest of this reorder
3361                      specification.  */
3362                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3363                             "LC_COLLATE", (int) arg->val.str.lenmb,
3364                             arg->val.str.startmb);
3365
3366                   do
3367                     {
3368                       lr_ignore_rest (ldfile, 0);
3369
3370                       now = lr_token (ldfile, charmap, NULL);
3371                     }
3372                   while (now->tok == tok_reorder_sections_after
3373                          || now->tok == tok_reorder_sections_end
3374                          || now->tok == tok_end);
3375
3376                   /* Process the token we just saw.  */
3377                   nowtok = now->tok;
3378                   continue;
3379                 }
3380             }
3381           else
3382             /* This must not happen.  */
3383             goto err_label;
3384           break;
3385
3386         case tok_reorder_sections_end:
3387           /* Ignore the rest of the line if we don't need the input of
3388              this line.  */
3389           if (ignore_content)
3390             break;
3391
3392           if (state != 5)
3393             goto err_label;
3394           state = 6;
3395           lr_ignore_rest (ldfile, 1);
3396           break;
3397
3398         case tok_bsymbol:
3399         case tok_ucs4:
3400           /* Ignore the rest of the line if we don't need the input of
3401              this line.  */
3402           if (ignore_content)
3403             {
3404               lr_ignore_rest (ldfile, 0);
3405               break;
3406             }
3407
3408           if (state != 0 && state != 1 && state != 3 && state != 5)
3409             goto err_label;
3410
3411           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3412             goto err_label;
3413
3414           if (nowtok == tok_ucs4)
3415             {
3416               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3417               symstr = ucs4buf;
3418               symlen = 9;
3419             }
3420           else
3421             {
3422               symstr = arg->val.str.startmb;
3423               symlen = arg->val.str.lenmb;
3424             }
3425
3426           if (state == 0)
3427             {
3428               /* We are outside an `order_start' region.  This means
3429                  we must only accept definitions of values for
3430                  collation symbols since these are purely abstract
3431                  values and don't need dorections associated.  */
3432               struct element_t *seqp;
3433
3434               if (find_entry (&collate->seq_table, symstr, symlen,
3435                               (void **) &seqp) == 0)
3436                 {
3437                   /* It's already defined.  First check whether this
3438                      is really a collating symbol.  */
3439                   if (seqp->is_character)
3440                     goto err_label;
3441
3442                   goto move_entry;
3443                 }
3444               else
3445                 {
3446                   void *result;
3447
3448                   if (find_entry (&collate->sym_table, symstr, symlen,
3449                                   &result) != 0)
3450                     /* No collating symbol, it's an error.  */
3451                     goto err_label;
3452
3453                   /* Maybe this is the first time we define a symbol
3454                      value and it is before the first actual section.  */
3455                   if (collate->sections == NULL)
3456                     collate->sections = collate->current_section =
3457                       &collate->symbol_section;
3458                 }
3459
3460               if (was_ellipsis != tok_none)
3461                 {
3462
3463                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3464                                    charmap, repertoire, collate);
3465
3466                   /* Remember that we processed the ellipsis.  */
3467                   was_ellipsis = tok_none;
3468
3469                   /* And don't add the value a second time.  */
3470                   break;
3471                 }
3472             }
3473           else if (state == 3)
3474             {
3475               /* It is possible that we already have this collation sequence.
3476                  In this case we move the entry.  */
3477               struct element_t *seqp;
3478               void *sym;
3479
3480               /* If the symbol after which we have to insert was not found
3481                  ignore all entries.  */
3482               if (collate->cursor == NULL)
3483                 {
3484                   lr_ignore_rest (ldfile, 0);
3485                   break;
3486                 }
3487
3488               if (find_entry (&collate->seq_table, symstr, symlen,
3489                               (void **) &seqp) == 0)
3490                 goto move_entry;
3491
3492               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3493                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3494                 goto move_entry;
3495
3496               if (find_entry (&collate->elem_table, symstr, symlen,
3497                               (void **) &seqp) == 0)
3498                 {
3499                 move_entry:
3500                   /* Remove the entry from the old position.  */
3501                   if (seqp->last == NULL)
3502                     collate->start = seqp->next;
3503                   else
3504                     seqp->last->next = seqp->next;
3505                   if (seqp->next != NULL)
3506                     seqp->next->last = seqp->last;
3507
3508                   /* We also have to check whether this entry is the
3509                      first or last of a section.  */
3510                   if (seqp->section->first == seqp)
3511                     {
3512                       if (seqp->section->first == seqp->section->last)
3513                         /* This setion has no content anymore.  */
3514                         seqp->section->first = seqp->section->last = NULL;
3515                       else
3516                         seqp->section->first = seqp->next;
3517                     }
3518                   else if (seqp->section->last == seqp)
3519                     seqp->section->last = seqp->last;
3520
3521                   /* Now insert it in the new place.  */
3522                   insert_weights (ldfile, seqp, charmap, repertoire, collate,
3523                                   tok_none);
3524                   break;
3525                 }
3526
3527               /* Otherwise we just add a new entry.  */
3528             }
3529           else if (state == 5)
3530             {
3531               /* We are reordering sections.  Find the named section.  */
3532               struct section_list *runp = collate->sections;
3533               struct section_list *prevp = NULL;
3534
3535               while (runp != NULL)
3536                 {
3537                   if (runp->name != NULL
3538                       && strlen (runp->name) == symlen
3539                       && memcmp (runp->name, symstr, symlen) == 0)
3540                     break;
3541
3542                   prevp = runp;
3543                   runp = runp->next;
3544                 }
3545
3546               if (runp == NULL)
3547                 {
3548                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3549                             "LC_COLLATE", (int) symlen, symstr);
3550                   lr_ignore_rest (ldfile, 0);
3551                 }
3552               else
3553                 {
3554                   if (runp != collate->current_section)
3555                     {
3556                       /* Remove the named section from the old place and
3557                          insert it in the new one.  */
3558                       prevp->next = runp->next;
3559
3560                       runp->next = collate->current_section->next;
3561                       collate->current_section->next = runp;
3562                       collate->current_section = runp;
3563                     }
3564
3565                   /* Process the rest of the line which might change
3566                      the collation rules.  */
3567                   arg = lr_token (ldfile, charmap, repertoire);
3568                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3569                     read_directions (ldfile, arg, charmap, repertoire,
3570                                      collate);
3571                 }
3572               break;
3573             }
3574           else if (was_ellipsis != tok_none)
3575             {
3576               /* Using the information in the `ellipsis_weight'
3577                  element and this and the last value we have to handle
3578                  the ellipsis now.  */
3579               assert (state == 1);
3580
3581               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3582                                repertoire, collate);
3583
3584               /* Remember that we processed the ellipsis.  */
3585               was_ellipsis = tok_none;
3586
3587               /* And don't add the value a second time.  */
3588               break;
3589             }
3590
3591           /* Now insert in the new place.  */
3592           insert_value (ldfile, symstr, symlen, charmap, repertoire, collate);
3593           break;
3594
3595         case tok_undefined:
3596           /* Ignore the rest of the line if we don't need the input of
3597              this line.  */
3598           if (ignore_content)
3599             {
3600               lr_ignore_rest (ldfile, 0);
3601               break;
3602             }
3603
3604           if (state != 1)
3605             goto err_label;
3606
3607           if (was_ellipsis != tok_none)
3608             {
3609               lr_error (ldfile,
3610                         _("%s: cannot have `%s' as end of ellipsis range"),
3611                         "LC_COLLATE", "UNDEFINED");
3612
3613               unlink_element (collate);
3614               was_ellipsis = tok_none;
3615             }
3616
3617           /* See whether UNDEFINED already appeared somewhere.  */
3618           if (collate->undefined.next != NULL
3619               || (collate->cursor != NULL
3620                   && collate->undefined.next == collate->cursor))
3621             {
3622               lr_error (ldfile,
3623                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3624                         "LC_COLLATE", 9, "UNDEFINED",
3625                         collate->undefined.file,
3626                         collate->undefined.line);
3627               lr_ignore_rest (ldfile, 0);
3628             }
3629           else
3630             /* Parse the weights.  */
3631              insert_weights (ldfile, &collate->undefined, charmap,
3632                              repertoire, collate, tok_none);
3633           break;
3634
3635         case tok_ellipsis2:
3636         case tok_ellipsis3:
3637         case tok_ellipsis4:
3638           /* This is the symbolic (decimal or hexadecimal) or absolute
3639              ellipsis.  */
3640           if (was_ellipsis != tok_none)
3641             goto err_label;
3642
3643           if (state != 0 && state != 1 && state != 3)
3644             goto err_label;
3645
3646           was_ellipsis = nowtok;
3647
3648           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3649                           repertoire, collate, nowtok);
3650           break;
3651
3652         case tok_end:
3653           /* Next we assume `LC_COLLATE'.  */
3654           if (!ignore_content)
3655             {
3656               if (state == 0)
3657                 /* We must either see a copy statement or have
3658                    ordering values.  */
3659                 lr_error (ldfile,
3660                           _("%s: empty category description not allowed"),
3661                           "LC_COLLATE");
3662               else if (state == 1)
3663                 {
3664                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3665                             "LC_COLLATE");
3666
3667                   /* Handle ellipsis at end of list.  */
3668                   if (was_ellipsis != tok_none)
3669                     {
3670                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3671                                        repertoire, collate);
3672                       was_ellipsis = tok_none;
3673                     }
3674                 }
3675               else if (state == 3)
3676                 error (0, 0, _("%s: missing `reorder-end' keyword"),
3677                        "LC_COLLATE");
3678               else if (state == 5)
3679                 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
3680                        "LC_COLLATE");
3681             }
3682           arg = lr_token (ldfile, charmap, NULL);
3683           if (arg->tok == tok_eof)
3684             break;
3685           if (arg->tok == tok_eol)
3686             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3687           else if (arg->tok != tok_lc_collate)
3688             lr_error (ldfile, _("\
3689 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3690           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3691           return;
3692
3693         default:
3694         err_label:
3695           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3696         }
3697
3698       /* Prepare for the next round.  */
3699       now = lr_token (ldfile, charmap, NULL);
3700       nowtok = now->tok;
3701     }
3702
3703   /* When we come here we reached the end of the file.  */
3704   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3705 }