]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/ld-collate.c
Update.
[thirdparty/glibc.git] / locale / programs / ld-collate.c
1 /* Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <errno.h>
25 #include <error.h>
26 #include <stdlib.h>
27 #include <wchar.h>
28 #include <sys/param.h>
29
30 #include "charmap.h"
31 #include "localeinfo.h"
32 #include "linereader.h"
33 #include "locfile.h"
34 #include "localedef.h"
35 #include "elem-hash.h"
36
37 /* Uncomment the following line in the production version. */
38 /* #define NDEBUG 1 */
39 #include <assert.h>
40
41 #define obstack_chunk_alloc malloc
42 #define obstack_chunk_free free
43
44 static inline void
45 obstack_int32_grow (struct obstack *obstack, int32_t data)
46 {
47 if (sizeof (int32_t) == sizeof (int))
48 obstack_int_grow (obstack, data);
49 else
50 obstack_grow (obstack, &data, sizeof (int32_t));
51 }
52
53 static inline void
54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
55 {
56 if (sizeof (int32_t) == sizeof (int))
57 obstack_int_grow_fast (obstack, data);
58 else
59 obstack_grow (obstack, &data, sizeof (int32_t));
60 }
61
62 /* Forward declaration. */
63 struct element_t;
64
65 /* Data type for list of strings. */
66 struct section_list
67 {
68 struct section_list *def_next;
69 struct section_list *next;
70 /* Name of the section. */
71 const char *name;
72 /* First element of this section. */
73 struct element_t *first;
74 /* Last element of this section. */
75 struct element_t *last;
76 /* These are the rules for this section. */
77 enum coll_sort_rule *rules;
78 /* Index of the rule set in the appropriate section of the output file. */
79 int ruleidx;
80 };
81
82 struct element_t;
83
84 struct element_list_t
85 {
86 /* Number of elements. */
87 int cnt;
88
89 struct element_t **w;
90 };
91
92 /* Data type for collating element. */
93 struct element_t
94 {
95 const char *name;
96
97 const char *mbs;
98 size_t nmbs;
99 const uint32_t *wcs;
100 size_t nwcs;
101 int *mborder;
102 int wcorder;
103
104 /* The following is a bit mask which bits are set if this element is
105 used in the appropriate level. Interesting for the singlebyte
106 weight computation.
107
108 XXX The type here restricts the number of levels to 32. It could
109 be changed if necessary but I doubt this is necessary. */
110 unsigned int used_in_level;
111
112 struct element_list_t *weights;
113
114 /* Nonzero if this is a real character definition. */
115 int is_character;
116
117 /* Order of the character in the sequence. This information will
118 be used in range expressions. */
119 int mbseqorder;
120 int wcseqorder;
121
122 /* Where does the definition come from. */
123 const char *file;
124 size_t line;
125
126 /* Which section does this belong to. */
127 struct section_list *section;
128
129 /* Predecessor and successor in the order list. */
130 struct element_t *last;
131 struct element_t *next;
132
133 /* Next element in multibyte output list. */
134 struct element_t *mbnext;
135 struct element_t *mblast;
136
137 /* Next element in wide character output list. */
138 struct element_t *wcnext;
139 struct element_t *wclast;
140 };
141
142 /* Special element value. */
143 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
144 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
145 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
146
147 /* Data type for collating symbol. */
148 struct symbol_t
149 {
150 const char *name;
151
152 /* Point to place in the order list. */
153 struct element_t *order;
154
155 /* Where does the definition come from. */
156 const char *file;
157 size_t line;
158 };
159
160 /* Sparse table of struct element_t *. */
161 #define TABLE wchead_table
162 #define ELEMENT struct element_t *
163 #define DEFAULT NULL
164 #define ITERATE
165 #define NO_FINALIZE
166 #include "3level.h"
167
168 /* Sparse table of int32_t. */
169 #define TABLE collidx_table
170 #define ELEMENT int32_t
171 #define DEFAULT 0
172 #include "3level.h"
173
174 /* Sparse table of uint32_t. */
175 #define TABLE collseq_table
176 #define ELEMENT uint32_t
177 #define DEFAULT ~((uint32_t) 0)
178 #include "3level.h"
179
180
181 /* The real definition of the struct for the LC_COLLATE locale. */
182 struct locale_collate_t
183 {
184 int col_weight_max;
185 int cur_weight_max;
186
187 /* List of known scripts. */
188 struct section_list *known_sections;
189 /* List of used sections. */
190 struct section_list *sections;
191 /* Current section using definition. */
192 struct section_list *current_section;
193 /* There always can be an unnamed section. */
194 struct section_list unnamed_section;
195 /* To make handling of errors easier we have another section. */
196 struct section_list error_section;
197 /* Sometimes we are defining the values for collating symbols before
198 the first actual section. */
199 struct section_list symbol_section;
200
201 /* Start of the order list. */
202 struct element_t *start;
203
204 /* The undefined element. */
205 struct element_t undefined;
206
207 /* This is the cursor for `reorder_after' insertions. */
208 struct element_t *cursor;
209
210 /* This value is used when handling ellipsis. */
211 struct element_t ellipsis_weight;
212
213 /* Known collating elements. */
214 hash_table elem_table;
215
216 /* Known collating symbols. */
217 hash_table sym_table;
218
219 /* Known collation sequences. */
220 hash_table seq_table;
221
222 struct obstack mempool;
223
224 /* The LC_COLLATE category is a bit special as it is sometimes possible
225 that the definitions from more than one input file contains information.
226 Therefore we keep all relevant input in a list. */
227 struct locale_collate_t *next;
228
229 /* Arrays with heads of the list for each of the leading bytes in
230 the multibyte sequences. */
231 struct element_t *mbheads[256];
232
233 /* Arrays with heads of the list for each of the leading bytes in
234 the multibyte sequences. */
235 struct wchead_table wcheads;
236
237 /* The arrays with the collation sequence order. */
238 unsigned char mbseqorder[256];
239 struct collseq_table wcseqorder;
240 };
241
242
243 /* We have a few global variables which are used for reading all
244 LC_COLLATE category descriptions in all files. */
245 static uint32_t nrules;
246
247
248 /* We need UTF-8 encoding of numbers. */
249 static inline int
250 utf8_encode (char *buf, int val)
251 {
252 int retval;
253
254 if (val < 0x80)
255 {
256 *buf++ = (char) val;
257 retval = 1;
258 }
259 else
260 {
261 int step;
262
263 for (step = 2; step < 6; ++step)
264 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
265 break;
266 retval = step;
267
268 *buf = (unsigned char) (~0xff >> step);
269 --step;
270 do
271 {
272 buf[step] = 0x80 | (val & 0x3f);
273 val >>= 6;
274 }
275 while (--step > 0);
276 *buf |= val;
277 }
278
279 return retval;
280 }
281
282
283 static struct section_list *
284 make_seclist_elem (struct locale_collate_t *collate, const char *string,
285 struct section_list *next)
286 {
287 struct section_list *newp;
288
289 newp = (struct section_list *) obstack_alloc (&collate->mempool,
290 sizeof (*newp));
291 newp->next = next;
292 newp->name = string;
293 newp->first = NULL;
294
295 return newp;
296 }
297
298
299 static struct element_t *
300 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
301 const uint32_t *wcs, const char *name, size_t namelen,
302 int is_character)
303 {
304 struct element_t *newp;
305
306 newp = (struct element_t *) obstack_alloc (&collate->mempool,
307 sizeof (*newp));
308 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
309 name, namelen);
310 if (mbs != NULL)
311 {
312 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
313 newp->nmbs = mbslen;
314 }
315 else
316 {
317 newp->mbs = NULL;
318 newp->nmbs = 0;
319 }
320 if (wcs != NULL)
321 {
322 size_t nwcs = wcslen ((wchar_t *) wcs);
323 uint32_t zero = 0;
324 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
325 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
326 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
327 newp->nwcs = nwcs;
328 }
329 else
330 {
331 newp->wcs = NULL;
332 newp->nwcs = 0;
333 }
334 newp->mborder = NULL;
335 newp->wcorder = 0;
336 newp->used_in_level = 0;
337 newp->is_character = is_character;
338
339 /* Will be allocated later. */
340 newp->weights = NULL;
341
342 newp->file = NULL;
343 newp->line = 0;
344
345 newp->section = collate->current_section;
346
347 newp->last = NULL;
348 newp->next = NULL;
349
350 newp->mbnext = NULL;
351 newp->mblast = NULL;
352
353 return newp;
354 }
355
356
357 static struct symbol_t *
358 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
359 {
360 struct symbol_t *newp;
361
362 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
363
364 newp->name = obstack_copy0 (&collate->mempool, name, len);
365 newp->order = NULL;
366
367 newp->file = NULL;
368 newp->line = 0;
369
370 return newp;
371 }
372
373
374 /* Test whether this name is already defined somewhere. */
375 static int
376 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
377 struct charmap_t *charmap, struct repertoire_t *repertoire,
378 const char *symbol, size_t symbol_len)
379 {
380 void *ignore = NULL;
381
382 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
383 {
384 lr_error (ldfile, _("`%.*s' already defined in charmap"),
385 (int) symbol_len, symbol);
386 return 1;
387 }
388
389 if (repertoire != NULL
390 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
391 == 0))
392 {
393 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
394 (int) symbol_len, symbol);
395 return 1;
396 }
397
398 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
399 {
400 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
401 (int) symbol_len, symbol);
402 return 1;
403 }
404
405 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
406 {
407 lr_error (ldfile, _("`%.*s' already defined as collating element"),
408 (int) symbol_len, symbol);
409 return 1;
410 }
411
412 return 0;
413 }
414
415
416 /* Read the direction specification. */
417 static void
418 read_directions (struct linereader *ldfile, struct token *arg,
419 struct charmap_t *charmap, struct repertoire_t *repertoire,
420 struct locale_collate_t *collate)
421 {
422 int cnt = 0;
423 int max = nrules ?: 10;
424 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
425 int warned = 0;
426
427 while (1)
428 {
429 int valid = 0;
430
431 if (arg->tok == tok_forward)
432 {
433 if (rules[cnt] & sort_backward)
434 {
435 if (! warned)
436 {
437 lr_error (ldfile, _("\
438 %s: `forward' and `backward' are mutually excluding each other"),
439 "LC_COLLATE");
440 warned = 1;
441 }
442 }
443 else if (rules[cnt] & sort_forward)
444 {
445 if (! warned)
446 {
447 lr_error (ldfile, _("\
448 %s: `%s' mentioned more than once in definition of weight %d"),
449 "LC_COLLATE", "forward", cnt + 1);
450 }
451 }
452 else
453 rules[cnt] |= sort_forward;
454
455 valid = 1;
456 }
457 else if (arg->tok == tok_backward)
458 {
459 if (rules[cnt] & sort_forward)
460 {
461 if (! warned)
462 {
463 lr_error (ldfile, _("\
464 %s: `forward' and `backward' are mutually excluding each other"),
465 "LC_COLLATE");
466 warned = 1;
467 }
468 }
469 else if (rules[cnt] & sort_backward)
470 {
471 if (! warned)
472 {
473 lr_error (ldfile, _("\
474 %s: `%s' mentioned more than once in definition of weight %d"),
475 "LC_COLLATE", "backward", cnt + 1);
476 }
477 }
478 else
479 rules[cnt] |= sort_backward;
480
481 valid = 1;
482 }
483 else if (arg->tok == tok_position)
484 {
485 if (rules[cnt] & sort_position)
486 {
487 if (! warned)
488 {
489 lr_error (ldfile, _("\
490 %s: `%s' mentioned more than once in definition of weight %d"),
491 "LC_COLLATE", "position", cnt + 1);
492 }
493 }
494 else
495 rules[cnt] |= sort_position;
496
497 valid = 1;
498 }
499
500 if (valid)
501 arg = lr_token (ldfile, charmap, repertoire);
502
503 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
504 || arg->tok == tok_semicolon)
505 {
506 if (! valid && ! warned)
507 {
508 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
509 warned = 1;
510 }
511
512 /* See whether we have to increment the counter. */
513 if (arg->tok != tok_comma && rules[cnt] != 0)
514 {
515 /* Add the default `forward' if we have seen only `position'. */
516 if (rules[cnt] == sort_position)
517 rules[cnt] = sort_position | sort_forward;
518
519 ++cnt;
520 }
521
522 if (arg->tok == tok_eof || arg->tok == tok_eol)
523 /* End of line or file, so we exit the loop. */
524 break;
525
526 if (nrules == 0)
527 {
528 /* See whether we have enough room in the array. */
529 if (cnt == max)
530 {
531 max += 10;
532 rules = (enum coll_sort_rule *) xrealloc (rules,
533 max
534 * sizeof (*rules));
535 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
536 }
537 }
538 else
539 {
540 if (cnt == nrules)
541 {
542 /* There must not be any more rule. */
543 if (! warned)
544 {
545 lr_error (ldfile, _("\
546 %s: too many rules; first entry only had %d"),
547 "LC_COLLATE", nrules);
548 warned = 1;
549 }
550
551 lr_ignore_rest (ldfile, 0);
552 break;
553 }
554 }
555 }
556 else
557 {
558 if (! warned)
559 {
560 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
561 warned = 1;
562 }
563 }
564
565 arg = lr_token (ldfile, charmap, repertoire);
566 }
567
568 if (nrules == 0)
569 {
570 /* Now we know how many rules we have. */
571 nrules = cnt;
572 rules = (enum coll_sort_rule *) xrealloc (rules,
573 nrules * sizeof (*rules));
574 }
575 else
576 {
577 if (cnt < nrules)
578 {
579 /* Not enough rules in this specification. */
580 if (! warned)
581 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
582
583 do
584 rules[cnt] = sort_forward;
585 while (++cnt < nrules);
586 }
587 }
588
589 collate->current_section->rules = rules;
590 }
591
592
593 static struct element_t *
594 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
595 const char *str, size_t len)
596 {
597 struct element_t *result = NULL;
598
599 /* Search for the entries among the collation sequences already define. */
600 if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
601 {
602 /* Nope, not define yet. So we see whether it is a
603 collation symbol. */
604 void *ptr;
605
606 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
607 {
608 /* It's a collation symbol. */
609 struct symbol_t *sym = (struct symbol_t *) ptr;
610 result = sym->order;
611
612 if (result == NULL)
613 result = sym->order = new_element (collate, NULL, 0, NULL,
614 NULL, 0, 0);
615 }
616 else if (find_entry (&collate->elem_table, str, len,
617 (void **) &result) != 0)
618 {
619 /* It's also no collation element. So it is a character
620 element defined later. */
621 result = new_element (collate, NULL, 0, NULL, str, len, 1);
622 if (result != NULL)
623 /* Insert it into the sequence table. */
624 insert_entry (&collate->seq_table, str, len, result);
625 }
626 }
627
628 return result;
629 }
630
631
632 static void
633 unlink_element (struct locale_collate_t *collate)
634 {
635 if (collate->cursor == collate->start)
636 {
637 assert (collate->cursor->next == NULL);
638 assert (collate->cursor->last == NULL);
639 collate->cursor = NULL;
640 }
641 else
642 {
643 if (collate->cursor->next != NULL)
644 collate->cursor->next->last = collate->cursor->last;
645 if (collate->cursor->last != NULL)
646 collate->cursor->last->next = collate->cursor->next;
647 collate->cursor = collate->cursor->last;
648 }
649 }
650
651
652 static void
653 insert_weights (struct linereader *ldfile, struct element_t *elem,
654 struct charmap_t *charmap, struct repertoire_t *repertoire,
655 struct locale_collate_t *collate, enum token_t ellipsis)
656 {
657 int weight_cnt;
658 struct token *arg;
659
660 /* Initialize all the fields. */
661 elem->file = ldfile->fname;
662 elem->line = ldfile->lineno;
663 elem->last = collate->cursor;
664 elem->next = collate->cursor ? collate->cursor->next : NULL;
665 if (collate->cursor != NULL && collate->cursor->next != NULL)
666 collate->cursor->next->last = elem;
667 elem->section = collate->current_section;
668 if (collate->cursor != NULL)
669 collate->cursor->next = elem;
670 if (collate->start == NULL)
671 {
672 assert (collate->cursor == NULL);
673 collate->start = elem;
674 }
675 elem->weights = (struct element_list_t *)
676 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
677 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
678
679 if (collate->current_section->first == NULL)
680 collate->current_section->first = elem;
681 if (collate->current_section->last == collate->cursor)
682 collate->current_section->last = elem;
683
684 collate->cursor = elem;
685
686 weight_cnt = 0;
687
688 arg = lr_token (ldfile, charmap, repertoire);
689 do
690 {
691 if (arg->tok == tok_eof || arg->tok == tok_eol)
692 break;
693
694 if (arg->tok == tok_ignore)
695 {
696 /* The weight for this level has to be ignored. We use the
697 null pointer to indicate this. */
698 elem->weights[weight_cnt].w = (struct element_t **)
699 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
700 elem->weights[weight_cnt].w[0] = NULL;
701 elem->weights[weight_cnt].cnt = 1;
702 }
703 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
704 {
705 char ucs4str[10];
706 struct element_t *val;
707 char *symstr;
708 size_t symlen;
709
710 if (arg->tok == tok_bsymbol)
711 {
712 symstr = arg->val.str.startmb;
713 symlen = arg->val.str.lenmb;
714 }
715 else
716 {
717 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
718 symstr = ucs4str;
719 symlen = 9;
720 }
721
722 val = find_element (ldfile, collate, symstr, symlen);
723 if (val == NULL)
724 break;
725
726 elem->weights[weight_cnt].w = (struct element_t **)
727 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
728 elem->weights[weight_cnt].w[0] = val;
729 elem->weights[weight_cnt].cnt = 1;
730 }
731 else if (arg->tok == tok_string)
732 {
733 /* Split the string up in the individual characters and put
734 the element definitions in the list. */
735 const char *cp = arg->val.str.startmb;
736 int cnt = 0;
737 struct element_t *charelem;
738 struct element_t **weights = NULL;
739 int max = 0;
740
741 if (*cp == '\0')
742 {
743 lr_error (ldfile, _("%s: empty weight string not allowed"),
744 "LC_COLLATE");
745 lr_ignore_rest (ldfile, 0);
746 break;
747 }
748
749 do
750 {
751 if (*cp == '<')
752 {
753 /* Ahh, it's a bsymbol or an UCS4 value. If it's
754 the latter we have to unify the name. */
755 const char *startp = ++cp;
756 size_t len;
757
758 while (*cp != '>')
759 {
760 if (*cp == ldfile->escape_char)
761 ++cp;
762 if (*cp == '\0')
763 /* It's a syntax error. */
764 goto syntax;
765
766 ++cp;
767 }
768
769 if (cp - startp == 5 && startp[0] == 'U'
770 && isxdigit (startp[1]) && isxdigit (startp[2])
771 && isxdigit (startp[3]) && isxdigit (startp[4]))
772 {
773 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
774 char *newstr;
775
776 newstr = (char *) xmalloc (10);
777 snprintf (newstr, 10, "U%08X", ucs4);
778 startp = newstr;
779
780 len = 9;
781 }
782 else
783 len = cp - startp;
784
785 charelem = find_element (ldfile, collate, startp, len);
786 ++cp;
787 }
788 else
789 {
790 /* People really shouldn't use characters directly in
791 the string. Especially since it's not really clear
792 what this means. We interpret all characters in the
793 string as if that would be bsymbols. Otherwise we
794 would have to match back to bsymbols somehow and this
795 is normally not what people normally expect. */
796 charelem = find_element (ldfile, collate, cp++, 1);
797 }
798
799 if (charelem == NULL)
800 {
801 /* We ignore the rest of the line. */
802 lr_ignore_rest (ldfile, 0);
803 break;
804 }
805
806 /* Add the pointer. */
807 if (cnt >= max)
808 {
809 struct element_t **newp;
810 max += 10;
811 newp = (struct element_t **)
812 alloca (max * sizeof (struct element_t *));
813 memcpy (newp, weights, cnt * sizeof (struct element_t *));
814 weights = newp;
815 }
816 weights[cnt++] = charelem;
817 }
818 while (*cp != '\0');
819
820 /* Now store the information. */
821 elem->weights[weight_cnt].w = (struct element_t **)
822 obstack_alloc (&collate->mempool,
823 cnt * sizeof (struct element_t *));
824 memcpy (elem->weights[weight_cnt].w, weights,
825 cnt * sizeof (struct element_t *));
826 elem->weights[weight_cnt].cnt = cnt;
827
828 /* We don't need the string anymore. */
829 free (arg->val.str.startmb);
830 }
831 else if (ellipsis != tok_none
832 && (arg->tok == tok_ellipsis2
833 || arg->tok == tok_ellipsis3
834 || arg->tok == tok_ellipsis4))
835 {
836 /* It must be the same ellipsis as used in the initial column. */
837 if (arg->tok != ellipsis)
838 lr_error (ldfile, _("\
839 %s: weights must use the same ellipsis symbol as the name"),
840 "LC_COLLATE");
841
842 /* The weight for this level has to be ignored. We use the
843 null pointer to indicate this. */
844 elem->weights[weight_cnt].w = (struct element_t **)
845 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
846 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
847 elem->weights[weight_cnt].cnt = 1;
848 }
849 else
850 {
851 syntax:
852 /* It's a syntax error. */
853 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
854 lr_ignore_rest (ldfile, 0);
855 break;
856 }
857
858 arg = lr_token (ldfile, charmap, repertoire);
859 /* This better should be the end of the line or a semicolon. */
860 if (arg->tok == tok_semicolon)
861 /* OK, ignore this and read the next token. */
862 arg = lr_token (ldfile, charmap, repertoire);
863 else if (arg->tok != tok_eof && arg->tok != tok_eol)
864 {
865 /* It's a syntax error. */
866 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
867 lr_ignore_rest (ldfile, 0);
868 break;
869 }
870 }
871 while (++weight_cnt < nrules);
872
873 if (weight_cnt < nrules)
874 {
875 /* This means the rest of the line uses the current element as
876 the weight. */
877 do
878 {
879 elem->weights[weight_cnt].w = (struct element_t **)
880 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
881 if (ellipsis == tok_none)
882 elem->weights[weight_cnt].w[0] = elem;
883 else
884 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
885 elem->weights[weight_cnt].cnt = 1;
886 }
887 while (++weight_cnt < nrules);
888 }
889 else
890 {
891 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
892 {
893 /* Too many rule values. */
894 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
895 lr_ignore_rest (ldfile, 0);
896 }
897 else
898 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
899 }
900 }
901
902
903 static int
904 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
905 struct charmap_t *charmap, struct repertoire_t *repertoire,
906 struct locale_collate_t *collate)
907 {
908 /* First find out what kind of symbol this is. */
909 struct charseq *seq;
910 uint32_t wc;
911 struct element_t *elem = NULL;
912
913 /* Try to find the character in the charmap. */
914 seq = charmap_find_value (charmap, symstr, symlen);
915
916 /* Determine the wide character. */
917 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
918 {
919 wc = repertoire_find_value (repertoire, symstr, symlen);
920 if (seq != NULL)
921 seq->ucs4 = wc;
922 }
923 else
924 wc = seq->ucs4;
925
926 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
927 {
928 /* It's no character, so look through the collation elements and
929 symbol list. */
930 void *result;
931
932 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
933 {
934 /* It's a collation symbol. */
935 struct symbol_t *sym = (struct symbol_t *) result;
936 elem = sym->order;
937
938 if (elem == NULL)
939 elem = sym->order = new_element (collate, NULL, 0, NULL,
940 sym->name, strlen (sym->name), 0);
941 }
942 else if (find_entry (&collate->elem_table, symstr, symlen,
943 (void **) &elem) != 0)
944 {
945 /* It's also no collation element. Therefore ignore it. */
946 lr_ignore_rest (ldfile, 0);
947 return 1;
948 }
949 }
950 else
951 {
952 /* Otherwise the symbols stands for a character. */
953 if (find_entry (&collate->seq_table, symstr, symlen,
954 (void **) &elem) != 0)
955 {
956 uint32_t wcs[2] = { wc, 0 };
957
958 /* We have to allocate an entry. */
959 elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
960 seq != NULL ? seq->nbytes : 0,
961 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
962 symstr, symlen, 1);
963
964 /* And add it to the table. */
965 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
966 /* This cannot happen. */
967 assert (! "Internal error");
968 }
969 else
970 {
971 /* Maybe the character was used before the definition. In this case
972 we have to insert the byte sequences now. */
973 if (elem->mbs == NULL && seq != NULL)
974 {
975 elem->mbs = obstack_copy0 (&collate->mempool,
976 seq->bytes, seq->nbytes);
977 elem->nmbs = seq->nbytes;
978 }
979
980 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
981 {
982 uint32_t wcs[2] = { wc, 0 };
983
984 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
985 elem->nwcs = 1;
986 }
987 }
988 }
989
990 /* Test whether this element is not already in the list. */
991 if (elem->next != NULL || (collate->cursor != NULL
992 && elem->next == collate->cursor))
993 {
994 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
995 (int) symlen, symstr, elem->file, elem->line);
996 lr_ignore_rest (ldfile, 0);
997 return 1;
998 }
999
1000 insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
1001
1002 return 0;
1003 }
1004
1005
1006 static void
1007 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1008 enum token_t ellipsis, struct charmap_t *charmap,
1009 struct repertoire_t *repertoire,
1010 struct locale_collate_t *collate)
1011 {
1012 struct element_t *startp;
1013 struct element_t *endp;
1014
1015 /* Unlink the entry added for the ellipsis. */
1016 unlink_element (collate);
1017 startp = collate->cursor;
1018
1019 /* Process and add the end-entry. */
1020 if (symstr != NULL
1021 && insert_value (ldfile, symstr, symlen, charmap, repertoire, collate))
1022 /* Something went wrong with inserting the to-value. This means
1023 we cannot process the ellipsis. */
1024 return;
1025
1026 /* Reset the cursor. */
1027 collate->cursor = startp;
1028
1029 /* Now we have to handle many different situations:
1030 - we have to distinguish between the three different ellipsis forms
1031 - the is the ellipsis at the beginning, in the middle, or at the end.
1032 */
1033 endp = collate->cursor->next;
1034 assert (symstr == NULL || endp != NULL);
1035
1036 /* XXX The following is probably very wrong since also collating symbols
1037 can appear in ranges. But do we want/can refine the test for that? */
1038 #if 0
1039 /* Both, the start and the end symbol, must stand for characters. */
1040 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1041 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1042 {
1043 lr_error (ldfile, _("\
1044 %s: the start and the end symbol of a range must stand for characters"),
1045 "LC_COLLATE");
1046 return;
1047 }
1048 #endif
1049
1050 if (ellipsis == tok_ellipsis3)
1051 {
1052 /* One requirement we make here: the length of the byte
1053 sequences for the first and end character must be the same.
1054 This is mainly to prevent unwanted effects and this is often
1055 not what is wanted. */
1056 size_t len = (startp->mbs != NULL ? startp->nmbs
1057 : (endp->mbs != NULL ? endp->nmbs : 0));
1058 char mbcnt[len + 1];
1059 char mbend[len + 1];
1060
1061 /* Well, this should be caught somewhere else already. Just to
1062 make sure. */
1063 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1064 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1065
1066 if (startp != NULL && endp != NULL
1067 && startp->mbs != NULL && endp->mbs != NULL
1068 && startp->nmbs != endp->nmbs)
1069 {
1070 lr_error (ldfile, _("\
1071 %s: byte sequences of first and last character must have the same length"),
1072 "LC_COLLATE");
1073 return;
1074 }
1075
1076 /* Determine whether we have to generate multibyte sequences. */
1077 if ((startp == NULL || startp->mbs != NULL)
1078 && (endp == NULL || endp->mbs != NULL))
1079 {
1080 int cnt;
1081 int ret;
1082
1083 /* Prepare the beginning byte sequence. This is either from the
1084 beginning byte sequence or it is all nulls if it was an
1085 initial ellipsis. */
1086 if (startp == NULL || startp->mbs == NULL)
1087 memset (mbcnt, '\0', len);
1088 else
1089 {
1090 memcpy (mbcnt, startp->mbs, len);
1091
1092 /* And increment it so that the value is the first one we will
1093 try to insert. */
1094 for (cnt = len - 1; cnt >= 0; --cnt)
1095 if (++mbcnt[cnt] != '\0')
1096 break;
1097 }
1098 mbcnt[len] = '\0';
1099
1100 /* And the end sequence. */
1101 if (endp == NULL || endp->mbs == NULL)
1102 memset (mbend, '\0', len);
1103 else
1104 memcpy (mbend, endp->mbs, len);
1105 mbend[len] = '\0';
1106
1107 /* Test whether we have a correct range. */
1108 ret = memcmp (mbcnt, mbend, len);
1109 if (ret >= 0)
1110 {
1111 if (ret > 0)
1112 lr_error (ldfile, _("%s: byte sequence of first character of \
1113 sequence is not lower than that of the last character"), "LC_COLLATE");
1114 return;
1115 }
1116
1117 /* Generate the byte sequences data. */
1118 while (1)
1119 {
1120 struct charseq *seq;
1121
1122 /* Quite a bit of work ahead. We have to find the character
1123 definition for the byte sequence and then determine the
1124 wide character belonging to it. */
1125 seq = charmap_find_symbol (charmap, mbcnt, len);
1126 if (seq != NULL)
1127 {
1128 struct element_t *elem;
1129 size_t namelen;
1130
1131 /* I don't this this can ever happen. */
1132 assert (seq->name != NULL);
1133 namelen = strlen (seq->name);
1134
1135 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1136 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1137 namelen);
1138
1139 /* Now we are ready to insert the new value in the
1140 sequence. Find out whether the element is
1141 already known. */
1142 if (find_entry (&collate->seq_table, seq->name, namelen,
1143 (void **) &elem) != 0)
1144 {
1145 uint32_t wcs[2] = { seq->ucs4, 0 };
1146
1147 /* We have to allocate an entry. */
1148 elem = new_element (collate, mbcnt, len,
1149 seq->ucs4 == ILLEGAL_CHAR_VALUE
1150 ? NULL : wcs, seq->name,
1151 namelen, 1);
1152
1153 /* And add it to the table. */
1154 if (insert_entry (&collate->seq_table, seq->name,
1155 namelen, elem) != 0)
1156 /* This cannot happen. */
1157 assert (! "Internal error");
1158 }
1159
1160 /* Test whether this element is not already in the list. */
1161 if (elem->next != NULL || (collate->cursor != NULL
1162 && elem->next == collate->cursor))
1163 {
1164 lr_error (ldfile, _("\
1165 order for `%.*s' already defined at %s:%Zu"),
1166 (int) namelen, seq->name,
1167 elem->file, elem->line);
1168 goto increment;
1169 }
1170
1171 /* Enqueue the new element. */
1172 elem->last = collate->cursor;
1173 if (collate->cursor == NULL)
1174 elem->next = NULL;
1175 else
1176 {
1177 elem->next = collate->cursor->next;
1178 elem->last->next = elem;
1179 if (elem->next != NULL)
1180 elem->next->last = elem;
1181 }
1182 if (collate->start == NULL)
1183 {
1184 assert (collate->cursor == NULL);
1185 collate->start = elem;
1186 }
1187 collate->cursor = elem;
1188
1189 /* Add the weight value. We take them from the
1190 `ellipsis_weights' member of `collate'. */
1191 elem->weights = (struct element_list_t *)
1192 obstack_alloc (&collate->mempool,
1193 nrules * sizeof (struct element_list_t));
1194 for (cnt = 0; cnt < nrules; ++cnt)
1195 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1196 && (collate->ellipsis_weight.weights[cnt].w[0]
1197 == ELEMENT_ELLIPSIS2))
1198 {
1199 elem->weights[cnt].w = (struct element_t **)
1200 obstack_alloc (&collate->mempool,
1201 sizeof (struct element_t *));
1202 elem->weights[cnt].w[0] = elem;
1203 elem->weights[cnt].cnt = 1;
1204 }
1205 else
1206 {
1207 /* Simply use the weight from `ellipsis_weight'. */
1208 elem->weights[cnt].w =
1209 collate->ellipsis_weight.weights[cnt].w;
1210 elem->weights[cnt].cnt =
1211 collate->ellipsis_weight.weights[cnt].cnt;
1212 }
1213 }
1214
1215 /* Increment for the next round. */
1216 increment:
1217 for (cnt = len - 1; cnt >= 0; --cnt)
1218 if (++mbcnt[cnt] != '\0')
1219 break;
1220
1221 /* Find out whether this was all. */
1222 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1223 /* Yep, that's all. */
1224 break;
1225 }
1226 }
1227 }
1228 else
1229 {
1230 /* For symbolic range we naturally must have a beginning and an
1231 end specified by the user. */
1232 if (startp == NULL)
1233 lr_error (ldfile, _("\
1234 %s: symbolic range ellipsis must not directly follow `order_start'"),
1235 "LC_COLLATE");
1236 else if (endp == NULL)
1237 lr_error (ldfile, _("\
1238 %s: symbolic range ellipsis must not be direct followed by `order_end'"),
1239 "LC_COLLATE");
1240 else
1241 {
1242 /* Determine the range. To do so we have to determine the
1243 common prefix of the both names and then the numeric
1244 values of both ends. */
1245 size_t lenfrom = strlen (startp->name);
1246 size_t lento = strlen (endp->name);
1247 char buf[lento + 1];
1248 int preflen = 0;
1249 long int from;
1250 long int to;
1251 char *cp;
1252 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1253
1254 if (lenfrom != lento)
1255 {
1256 invalid_range:
1257 lr_error (ldfile, _("\
1258 `%s' and `%.*s' are no valid names for symbolic range"),
1259 startp->name, (int) lento, endp->name);
1260 return;
1261 }
1262
1263 while (startp->name[preflen] == endp->name[preflen])
1264 if (startp->name[preflen] == '\0')
1265 /* Nothing to be done. The start and end point are identical
1266 and while inserting the end point we have already given
1267 the user an error message. */
1268 return;
1269 else
1270 ++preflen;
1271
1272 errno = 0;
1273 from = strtol (startp->name + preflen, &cp, base);
1274 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1275 goto invalid_range;
1276
1277 errno = 0;
1278 to = strtol (endp->name + preflen, &cp, base);
1279 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1280 goto invalid_range;
1281
1282 /* Copy the prefix. */
1283 memcpy (buf, startp->name, preflen);
1284
1285 /* Loop over all values. */
1286 for (++from; from < to; ++from)
1287 {
1288 struct element_t *elem = NULL;
1289 struct charseq *seq;
1290 uint32_t wc;
1291 int cnt;
1292
1293 /* Generate the the name. */
1294 sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
1295
1296 /* Look whether this name is already defined. */
1297 if (find_entry (&collate->seq_table, buf, symlen,
1298 (void **) &elem) == 0)
1299 {
1300 if (elem->next != NULL || (collate->cursor != NULL
1301 && elem->next == collate->cursor))
1302 {
1303 lr_error (ldfile, _("\
1304 %s: order for `%.*s' already defined at %s:%Zu"),
1305 "LC_COLLATE", (int) lenfrom, buf,
1306 elem->file, elem->line);
1307 continue;
1308 }
1309
1310 if (elem->name == NULL)
1311 {
1312 lr_error (ldfile, _("%s: `%s' must be a charater"),
1313 "LC_COLLATE", buf);
1314 continue;
1315 }
1316 }
1317
1318 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1319 {
1320 /* Search for a character of this name. */
1321 seq = charmap_find_value (charmap, buf, lenfrom);
1322 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1323 {
1324 wc = repertoire_find_value (repertoire, buf, lenfrom);
1325
1326 if (seq != NULL)
1327 seq->ucs4 = wc;
1328 }
1329 else
1330 wc = seq->ucs4;
1331
1332 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1333 /* We don't know anything about a character with this
1334 name. XXX Should we warn? */
1335 continue;
1336
1337 if (elem == NULL)
1338 {
1339 uint32_t wcs[2] = { wc, 0 };
1340
1341 /* We have to allocate an entry. */
1342 elem = new_element (collate,
1343 seq != NULL ? seq->bytes : NULL,
1344 seq != NULL ? seq->nbytes : 0,
1345 wc == ILLEGAL_CHAR_VALUE
1346 ? NULL : wcs, buf, lenfrom, 1);
1347 }
1348 else
1349 {
1350 /* Update the element. */
1351 if (seq != NULL)
1352 {
1353 elem->mbs = obstack_copy0 (&collate->mempool,
1354 seq->bytes, seq->nbytes);
1355 elem->nmbs = seq->nbytes;
1356 }
1357
1358 if (wc != ILLEGAL_CHAR_VALUE)
1359 {
1360 uint32_t zero = 0;
1361
1362 obstack_grow (&collate->mempool,
1363 &wc, sizeof (uint32_t));
1364 obstack_grow (&collate->mempool,
1365 &zero, sizeof (uint32_t));
1366 elem->wcs = obstack_finish (&collate->mempool);
1367 elem->nwcs = 1;
1368 }
1369 }
1370
1371 elem->file = ldfile->fname;
1372 elem->line = ldfile->lineno;
1373 elem->section = collate->current_section;
1374 }
1375
1376 /* Enqueue the new element. */
1377 elem->last = collate->cursor;
1378 elem->next = collate->cursor->next;
1379 elem->last->next = elem;
1380 if (elem->next != NULL)
1381 elem->next->last = elem;
1382 collate->cursor = elem;
1383
1384 /* Now add the weights. They come from the `ellipsis_weights'
1385 member of `collate'. */
1386 elem->weights = (struct element_list_t *)
1387 obstack_alloc (&collate->mempool,
1388 nrules * sizeof (struct element_list_t));
1389 for (cnt = 0; cnt < nrules; ++cnt)
1390 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1391 && (collate->ellipsis_weight.weights[cnt].w[0]
1392 == ELEMENT_ELLIPSIS2))
1393 {
1394 elem->weights[cnt].w = (struct element_t **)
1395 obstack_alloc (&collate->mempool,
1396 sizeof (struct element_t *));
1397 elem->weights[cnt].w[0] = elem;
1398 elem->weights[cnt].cnt = 1;
1399 }
1400 else
1401 {
1402 /* Simly use the weight from `ellipsis_weight'. */
1403 elem->weights[cnt].w =
1404 collate->ellipsis_weight.weights[cnt].w;
1405 elem->weights[cnt].cnt =
1406 collate->ellipsis_weight.weights[cnt].cnt;
1407 }
1408 }
1409 }
1410 }
1411 }
1412
1413
1414 static void
1415 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1416 struct localedef_t *copy_locale, int ignore_content)
1417 {
1418 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1419 {
1420 struct locale_collate_t *collate;
1421
1422 if (copy_locale == NULL)
1423 {
1424 collate = locale->categories[LC_COLLATE].collate =
1425 (struct locale_collate_t *)
1426 xcalloc (1, sizeof (struct locale_collate_t));
1427
1428 /* Init the various data structures. */
1429 init_hash (&collate->elem_table, 100);
1430 init_hash (&collate->sym_table, 100);
1431 init_hash (&collate->seq_table, 500);
1432 obstack_init (&collate->mempool);
1433
1434 collate->col_weight_max = -1;
1435 }
1436 else
1437 collate = locale->categories[LC_COLLATE].collate =
1438 copy_locale->categories[LC_COLLATE].collate;
1439 }
1440
1441 ldfile->translate_strings = 0;
1442 ldfile->return_widestr = 0;
1443 }
1444
1445
1446 void
1447 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
1448 {
1449 /* Now is the time when we can assign the individual collation
1450 values for all the symbols. We have possibly different values
1451 for the wide- and the multibyte-character symbols. This is done
1452 since it might make a difference in the encoding if there is in
1453 some cases no multibyte-character but there are wide-characters.
1454 (The other way around it is not important since theencoded
1455 collation value in the wide-character case is 32 bits wide and
1456 therefore requires no encoding).
1457
1458 The lowest collation value assigned is 2. Zero is reserved for
1459 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1460 functions and 1 is used to separate the individual passes for the
1461 different rules.
1462
1463 We also have to construct is list with all the bytes/words which
1464 can come first in a sequence, followed by all the elements which
1465 also start with this byte/word. The order is reverse which has
1466 among others the important effect that longer strings are located
1467 first in the list. This is required for the output data since
1468 the algorithm used in `strcoll' etc depends on this.
1469
1470 The multibyte case is easy. We simply sort into an array with
1471 256 elements. */
1472 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1473 int mbact[nrules];
1474 int wcact;
1475 int mbseqact;
1476 int wcseqact;
1477 struct element_t *runp;
1478 int i;
1479 int need_undefined = 0;
1480 struct section_list *sect;
1481 int ruleidx;
1482 int nr_wide_elems = 0;
1483
1484 if (collate == NULL)
1485 {
1486 /* No data, no check. */
1487 if (! be_quiet)
1488 error (0, 0, _("No definition for %s category found"), "LC_COLLATE");
1489 return;
1490 }
1491
1492 /* If this assertion is hit change the type in `element_t'. */
1493 assert (nrules <= sizeof (runp->used_in_level) * 8);
1494
1495 /* Make sure that the `position' rule is used either in all sections
1496 or in none. */
1497 for (i = 0; i < nrules; ++i)
1498 for (sect = collate->sections; sect != NULL; sect = sect->next)
1499 if (sect->rules != NULL
1500 && ((sect->rules[i] & sort_position)
1501 != (collate->sections->rules[i] & sort_position)))
1502 {
1503 error (0, 0, _("\
1504 %s: `position' must be used for a specific level in all sections or none"),
1505 "LC_COLLATE");
1506 break;
1507 }
1508
1509 /* Find out which elements are used at which level. At the same
1510 time we find out whether we have any undefined symbols. */
1511 runp = collate->start;
1512 while (runp != NULL)
1513 {
1514 if (runp->mbs != NULL)
1515 {
1516 for (i = 0; i < nrules; ++i)
1517 {
1518 int j;
1519
1520 for (j = 0; j < runp->weights[i].cnt; ++j)
1521 /* A NULL pointer as the weight means IGNORE. */
1522 if (runp->weights[i].w[j] != NULL)
1523 {
1524 if (runp->weights[i].w[j]->weights == NULL)
1525 {
1526 error_at_line (0, 0, runp->file, runp->line,
1527 _("symbol `%s' not defined"),
1528 runp->weights[i].w[j]->name);
1529
1530 need_undefined = 1;
1531 runp->weights[i].w[j] = &collate->undefined;
1532 }
1533 else
1534 /* Set the bit for the level. */
1535 runp->weights[i].w[j]->used_in_level |= 1 << i;
1536 }
1537 }
1538 }
1539
1540 /* Up to the next entry. */
1541 runp = runp->next;
1542 }
1543
1544 /* Walk through the list of defined sequences and assign weights. Also
1545 create the data structure which will allow generating the single byte
1546 character based tables.
1547
1548 Since at each time only the weights for each of the rules are
1549 only compared to other weights for this rule it is possible to
1550 assign more compact weight values than simply counting all
1551 weights in sequence. We can assign weights from 3, one for each
1552 rule individually and only for those elements, which are actually
1553 used for this rule.
1554
1555 Why is this important? It is not for the wide char table. But
1556 it is for the singlebyte output since here larger numbers have to
1557 be encoded to make it possible to emit the value as a byte
1558 string. */
1559 for (i = 0; i < nrules; ++i)
1560 mbact[i] = 2;
1561 wcact = 2;
1562 mbseqact = 0;
1563 wcseqact = 0;
1564 runp = collate->start;
1565 while (runp != NULL)
1566 {
1567 /* Determine the order. */
1568 if (runp->used_in_level != 0)
1569 {
1570 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1571 nrules * sizeof (int));
1572
1573 for (i = 0; i < nrules; ++i)
1574 if ((runp->used_in_level & (1 << i)) != 0)
1575 runp->mborder[i] = mbact[i]++;
1576 else
1577 runp->mborder[i] = 0;
1578 }
1579
1580 if (runp->mbs != NULL)
1581 {
1582 struct element_t **eptr;
1583 struct element_t *lastp = NULL;
1584
1585 /* Find the point where to insert in the list. */
1586 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1587 while (*eptr != NULL)
1588 {
1589 if ((*eptr)->nmbs < runp->nmbs)
1590 break;
1591
1592 if ((*eptr)->nmbs == runp->nmbs)
1593 {
1594 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1595
1596 if (c == 0)
1597 {
1598 /* This should not happen. It means that we have
1599 to symbols with the same byte sequence. It is
1600 of course an error. */
1601 error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1602 _("symbol `%s' has the same encoding as"),
1603 (*eptr)->name);
1604 error_at_line (0, 0, runp->file, runp->line,
1605 _("symbol `%s'"), runp->name);
1606 goto dont_insert;
1607 }
1608 else if (c < 0)
1609 /* Insert it here. */
1610 break;
1611 }
1612
1613 /* To the next entry. */
1614 lastp = *eptr;
1615 eptr = &(*eptr)->mbnext;
1616 }
1617
1618 /* Set the pointers. */
1619 runp->mbnext = *eptr;
1620 runp->mblast = lastp;
1621 if (*eptr != NULL)
1622 (*eptr)->mblast = runp;
1623 *eptr = runp;
1624 dont_insert:
1625 }
1626
1627 if (runp->used_in_level)
1628 {
1629 runp->wcorder = wcact++;
1630
1631 /* We take the opportunity to count the elements which have
1632 wide characters. */
1633 ++nr_wide_elems;
1634 }
1635
1636 if (runp->is_character)
1637 {
1638 if (runp->nmbs == 1)
1639 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1640
1641 runp->wcseqorder = wcseqact++;
1642 }
1643
1644 /* Up to the next entry. */
1645 runp = runp->next;
1646 }
1647
1648 /* Find out whether any of the `mbheads' entries is unset. In this
1649 case we use the UNDEFINED entry. */
1650 for (i = 1; i < 256; ++i)
1651 if (collate->mbheads[i] == NULL)
1652 {
1653 need_undefined = 1;
1654 collate->mbheads[i] = &collate->undefined;
1655 }
1656
1657 /* Now to the wide character case. */
1658 collate->wcheads.p = 6;
1659 collate->wcheads.q = 10;
1660 wchead_table_init (&collate->wcheads);
1661
1662 collate->wcseqorder.p = 6;
1663 collate->wcseqorder.q = 10;
1664 collseq_table_init (&collate->wcseqorder);
1665
1666 /* Start adding. */
1667 runp = collate->start;
1668 while (runp != NULL)
1669 {
1670 if (runp->wcs != NULL)
1671 {
1672 struct element_t *e;
1673 struct element_t **eptr;
1674 struct element_t *lastp;
1675
1676 /* Insert the collation sequence value. */
1677 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1678 runp->wcseqorder);
1679
1680 /* Find the point where to insert in the list. */
1681 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1682 eptr = &e;
1683 lastp = NULL;
1684 while (*eptr != NULL)
1685 {
1686 if ((*eptr)->nwcs < runp->nwcs)
1687 break;
1688
1689 if ((*eptr)->nwcs == runp->nwcs)
1690 {
1691 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1692 (wchar_t *) runp->wcs, runp->nwcs);
1693
1694 if (c == 0)
1695 {
1696 /* This should not happen. It means that we have
1697 two symbols with the same byte sequence. It is
1698 of course an error. */
1699 error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
1700 _("symbol `%s' has the same encoding as"),
1701 (*eptr)->name);
1702 error_at_line (0, 0, runp->file, runp->line,
1703 _("symbol `%s'"), runp->name);
1704 goto dont_insertwc;
1705 }
1706 else if (c < 0)
1707 /* Insert it here. */
1708 break;
1709 }
1710
1711 /* To the next entry. */
1712 lastp = *eptr;
1713 eptr = &(*eptr)->wcnext;
1714 }
1715
1716 /* Set the pointers. */
1717 runp->wcnext = *eptr;
1718 runp->wclast = lastp;
1719 if (*eptr != NULL)
1720 (*eptr)->wclast = runp;
1721 *eptr = runp;
1722 if (eptr == &e)
1723 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1724 dont_insertwc:
1725 }
1726
1727 /* Up to the next entry. */
1728 runp = runp->next;
1729 }
1730
1731 collseq_table_finalize (&collate->wcseqorder);
1732
1733 /* Now determine whether the UNDEFINED entry is needed and if yes,
1734 whether it was defined. */
1735 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1736 if (collate->undefined.file == NULL)
1737 {
1738 if (need_undefined)
1739 {
1740 /* This seems not to be enforced by recent standards. Don't
1741 emit an error, simply append UNDEFINED at the end. */
1742 if (0)
1743 error (0, 0, _("no definition of `UNDEFINED'"));
1744
1745 /* Add UNDEFINED at the end. */
1746 collate->undefined.mborder =
1747 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1748
1749 for (i = 0; i < nrules; ++i)
1750 collate->undefined.mborder[i] = mbact[i]++;
1751 }
1752
1753 /* In any case we will need the definition for the wide character
1754 case. But we will not complain that it is missing since the
1755 specification strangely enough does not seem to account for
1756 this. */
1757 collate->undefined.wcorder = wcact++;
1758 }
1759
1760 /* Finally, try to unify the rules for the sections. Whenever the rules
1761 for a section are the same as those for another section give the
1762 ruleset the same index. Since there are never many section we can
1763 use an O(n^2) algorithm here. */
1764 sect = collate->sections;
1765 while (sect != NULL && sect->rules == NULL)
1766 sect = sect->next;
1767 assert (sect != NULL);
1768 ruleidx = 0;
1769 do
1770 {
1771 struct section_list *osect = collate->sections;
1772
1773 while (osect != sect)
1774 if (osect->rules != NULL
1775 && memcmp (osect->rules, sect->rules, nrules) == 0)
1776 break;
1777 else
1778 osect = osect->next;
1779
1780 if (osect == sect)
1781 sect->ruleidx = ruleidx++;
1782 else
1783 sect->ruleidx = osect->ruleidx;
1784
1785 /* Next section. */
1786 do
1787 sect = sect->next;
1788 while (sect != NULL && sect->rules == NULL);
1789 }
1790 while (sect != NULL);
1791 /* We are currently not prepared for more than 256 rulesets. But this
1792 should never really be a problem. */
1793 assert (ruleidx <= 256);
1794 }
1795
1796
1797 static int32_t
1798 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1799 struct element_t *elem)
1800 {
1801 size_t cnt;
1802 int32_t retval;
1803
1804 /* Optimize the use of UNDEFINED. */
1805 if (elem == &collate->undefined)
1806 /* The weights are already inserted. */
1807 return 0;
1808
1809 /* This byte can start exactly one collation element and this is
1810 a single byte. We can directly give the index to the weights. */
1811 retval = obstack_object_size (pool);
1812
1813 /* Construct the weight. */
1814 for (cnt = 0; cnt < nrules; ++cnt)
1815 {
1816 char buf[elem->weights[cnt].cnt * 7];
1817 int len = 0;
1818 int i;
1819
1820 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1821 /* Encode the weight value. We do nothing for IGNORE entries. */
1822 if (elem->weights[cnt].w[i] != NULL)
1823 len += utf8_encode (&buf[len],
1824 elem->weights[cnt].w[i]->mborder[cnt]);
1825
1826 /* And add the buffer content. */
1827 obstack_1grow (pool, len);
1828 obstack_grow (pool, buf, len);
1829 }
1830
1831 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1832 }
1833
1834
1835 static int32_t
1836 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1837 struct element_t *elem)
1838 {
1839 size_t cnt;
1840 int32_t retval;
1841
1842 /* Optimize the use of UNDEFINED. */
1843 if (elem == &collate->undefined)
1844 /* The weights are already inserted. */
1845 return 0;
1846
1847 /* This byte can start exactly one collation element and this is
1848 a single byte. We can directly give the index to the weights. */
1849 retval = obstack_object_size (pool) / sizeof (int32_t);
1850
1851 /* Construct the weight. */
1852 for (cnt = 0; cnt < nrules; ++cnt)
1853 {
1854 int32_t buf[elem->weights[cnt].cnt];
1855 int i;
1856 int32_t j;
1857
1858 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1859 if (elem->weights[cnt].w[i] != NULL)
1860 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1861
1862 /* And add the buffer content. */
1863 obstack_int32_grow (pool, j);
1864
1865 obstack_grow (pool, buf, j * sizeof (int32_t));
1866 }
1867
1868 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1869 }
1870
1871
1872 void
1873 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
1874 const char *output_path)
1875 {
1876 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1877 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1878 struct iovec iov[2 + nelems];
1879 struct locale_file data;
1880 uint32_t idx[nelems];
1881 size_t cnt;
1882 size_t ch;
1883 int32_t tablemb[256];
1884 struct obstack weightpool;
1885 struct obstack extrapool;
1886 struct obstack indirectpool;
1887 struct section_list *sect;
1888 struct collidx_table tablewc;
1889 uint32_t elem_size;
1890 uint32_t *elem_table;
1891 int i;
1892 struct element_t *runp;
1893
1894 data.magic = LIMAGIC (LC_COLLATE);
1895 data.n = nelems;
1896 iov[0].iov_base = (void *) &data;
1897 iov[0].iov_len = sizeof (data);
1898
1899 iov[1].iov_base = (void *) idx;
1900 iov[1].iov_len = sizeof (idx);
1901
1902 idx[0] = iov[0].iov_len + iov[1].iov_len;
1903 cnt = 0;
1904
1905 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1906 iov[2 + cnt].iov_base = &nrules;
1907 iov[2 + cnt].iov_len = sizeof (uint32_t);
1908 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1909 ++cnt;
1910
1911 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
1912 if (collate == NULL)
1913 {
1914 int32_t dummy = 0;
1915
1916 while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1917 {
1918 /* The words have to be handled specially. */
1919 if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1920 {
1921 iov[2 + cnt].iov_base = &dummy;
1922 iov[2 + cnt].iov_len = sizeof (int32_t);
1923 }
1924 else
1925 {
1926 iov[2 + cnt].iov_base = NULL;
1927 iov[2 + cnt].iov_len = 0;
1928 }
1929
1930 if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1931 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1932 ++cnt;
1933 }
1934
1935 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1936
1937 write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1938
1939 return;
1940 }
1941
1942 obstack_init (&weightpool);
1943 obstack_init (&extrapool);
1944 obstack_init (&indirectpool);
1945
1946 /* Since we are using the sign of an integer to mark indirection the
1947 offsets in the arrays we are indirectly referring to must not be
1948 zero since -0 == 0. Therefore we add a bit of dummy content. */
1949 obstack_int32_grow (&extrapool, 0);
1950 obstack_int32_grow (&indirectpool, 0);
1951
1952 /* Prepare the ruleset table. */
1953 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1954 if (sect->rules != NULL && sect->ruleidx == i)
1955 {
1956 int j;
1957
1958 obstack_make_room (&weightpool, nrules);
1959
1960 for (j = 0; j < nrules; ++j)
1961 obstack_1grow_fast (&weightpool, sect->rules[j]);
1962 ++i;
1963 }
1964 /* And align the output. */
1965 i = (nrules * i) % __alignof__ (int32_t);
1966 if (i > 0)
1967 do
1968 obstack_1grow (&weightpool, '\0');
1969 while (++i < __alignof__ (int32_t));
1970
1971 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
1972 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
1973 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
1974 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1975 ++cnt;
1976
1977 /* Generate the 8-bit table. Walk through the lists of sequences
1978 starting with the same byte and add them one after the other to
1979 the table. In case we have more than one sequence starting with
1980 the same byte we have to use extra indirection.
1981
1982 First add a record for the NUL byte. This entry will never be used
1983 so it does not matter. */
1984 tablemb[0] = 0;
1985
1986 /* Now insert the `UNDEFINED' value if it is used. Since this value
1987 will probably be used more than once it is good to store the
1988 weights only once. */
1989 if (collate->undefined.used_in_level != 0)
1990 output_weight (&weightpool, collate, &collate->undefined);
1991
1992 for (ch = 1; ch < 256; ++ch)
1993 if (collate->mbheads[ch]->mbnext == NULL
1994 && collate->mbheads[ch]->nmbs <= 1)
1995 {
1996 tablemb[ch] = output_weight (&weightpool, collate,
1997 collate->mbheads[ch]);
1998 }
1999 else
2000 {
2001 /* The entries in the list are sorted by length and then
2002 alphabetically. This is the order in which we will add the
2003 elements to the collation table. This allows simply walking
2004 the table in sequence and stopping at the first matching
2005 entry. Since the longer sequences are coming first in the
2006 list they have the possibility to match first, just as it
2007 has to be. In the worst case we are walking to the end of
2008 the list where we put, if no singlebyte sequence is defined
2009 in the locale definition, the weights for UNDEFINED.
2010
2011 To reduce the length of the search list we compress them a bit.
2012 This happens by collecting sequences of consecutive byte
2013 sequences in one entry (having and begin and end byte sequence)
2014 and add only one index into the weight table. We can find the
2015 consecutive entries since they are also consecutive in the list. */
2016 struct element_t *runp = collate->mbheads[ch];
2017 struct element_t *lastp;
2018
2019 assert ((obstack_object_size (&extrapool)
2020 & (__alignof__ (int32_t) - 1)) == 0);
2021
2022 tablemb[ch] = -obstack_object_size (&extrapool);
2023
2024 do
2025 {
2026 /* Store the current index in the weight table. We know that
2027 the current position in the `extrapool' is aligned on a
2028 32-bit address. */
2029 int32_t weightidx;
2030 int added;
2031
2032 /* Find out wether this is a single entry or we have more than
2033 one consecutive entry. */
2034 if (runp->mbnext != NULL
2035 && runp->nmbs == runp->mbnext->nmbs
2036 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2037 && (runp->mbs[runp->nmbs - 1]
2038 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2039 {
2040 int i;
2041 struct element_t *series_startp = runp;
2042 struct element_t *curp;
2043
2044 /* Compute how much space we will need. */
2045 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2046 + __alignof__ (int32_t) - 1)
2047 & ~(__alignof__ (int32_t) - 1));
2048 assert ((obstack_object_size (&extrapool)
2049 & (__alignof__ (int32_t) - 1)) == 0);
2050 obstack_make_room (&extrapool, added);
2051
2052 /* More than one consecutive entry. We mark this by having
2053 a negative index into the indirect table. */
2054 obstack_int32_grow_fast (&extrapool,
2055 -(obstack_object_size (&indirectpool)
2056 / sizeof (int32_t)));
2057
2058 /* Now search first the end of the series. */
2059 do
2060 runp = runp->mbnext;
2061 while (runp->mbnext != NULL
2062 && runp->nmbs == runp->mbnext->nmbs
2063 && memcmp (runp->mbs, runp->mbnext->mbs,
2064 runp->nmbs - 1) == 0
2065 && (runp->mbs[runp->nmbs - 1]
2066 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2067
2068 /* Now walk backward from here to the beginning. */
2069 curp = runp;
2070
2071 assert (runp->nmbs <= 256);
2072 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2073 for (i = 1; i < curp->nmbs; ++i)
2074 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2075
2076 /* Now find the end of the consecutive sequence and
2077 add all the indeces in the indirect pool. */
2078 do
2079 {
2080 weightidx = output_weight (&weightpool, collate, curp);
2081 obstack_int32_grow (&indirectpool, weightidx);
2082
2083 curp = curp->mblast;
2084 }
2085 while (curp != series_startp);
2086
2087 /* Add the final weight. */
2088 weightidx = output_weight (&weightpool, collate, curp);
2089 obstack_int32_grow (&indirectpool, weightidx);
2090
2091 /* And add the end byte sequence. Without length this
2092 time. */
2093 for (i = 1; i < curp->nmbs; ++i)
2094 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2095 }
2096 else
2097 {
2098 /* A single entry. Simply add the index and the length and
2099 string (except for the first character which is already
2100 tested for). */
2101 int i;
2102
2103 /* Output the weight info. */
2104 weightidx = output_weight (&weightpool, collate, runp);
2105
2106 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2107 + __alignof__ (int32_t) - 1)
2108 & ~(__alignof__ (int32_t) - 1));
2109 assert ((obstack_object_size (&extrapool)
2110 & (__alignof__ (int32_t) - 1)) == 0);
2111 obstack_make_room (&extrapool, added);
2112
2113 obstack_int32_grow_fast (&extrapool, weightidx);
2114 assert (runp->nmbs <= 256);
2115 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2116
2117 for (i = 1; i < runp->nmbs; ++i)
2118 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2119 }
2120
2121 /* Add alignment bytes if necessary. */
2122 while ((obstack_object_size (&extrapool)
2123 & (__alignof__ (int32_t) - 1)) != 0)
2124 obstack_1grow_fast (&extrapool, '\0');
2125
2126 /* Next entry. */
2127 lastp = runp;
2128 runp = runp->mbnext;
2129 }
2130 while (runp != NULL);
2131
2132 assert ((obstack_object_size (&extrapool)
2133 & (__alignof__ (int32_t) - 1)) == 0);
2134
2135 /* If the final entry in the list is not a single character we
2136 add an UNDEFINED entry here. */
2137 if (lastp->nmbs != 1)
2138 {
2139 int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2140 & ~(__alignof__ (int32_t) - 1));
2141 obstack_make_room (&extrapool, added);
2142
2143 obstack_int32_grow_fast (&extrapool, 0);
2144 /* XXX What rule? We just pick the first. */
2145 obstack_1grow_fast (&extrapool, 0);
2146 /* Length is zero. */
2147 obstack_1grow_fast (&extrapool, 0);
2148
2149 /* Add alignment bytes if necessary. */
2150 while ((obstack_object_size (&extrapool)
2151 & (__alignof__ (int32_t) - 1)) != 0)
2152 obstack_1grow_fast (&extrapool, '\0');
2153 }
2154 }
2155
2156 /* Add padding to the tables if necessary. */
2157 while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2158 != 0)
2159 obstack_1grow (&weightpool, 0);
2160
2161 /* Now add the four tables. */
2162 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2163 iov[2 + cnt].iov_base = tablemb;
2164 iov[2 + cnt].iov_len = sizeof (tablemb);
2165 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2166 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2167 ++cnt;
2168
2169 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2170 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2171 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2172 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2173 ++cnt;
2174
2175 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2176 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2177 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2178 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2179 ++cnt;
2180
2181 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2182 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2183 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2184 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2185 assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2186 ++cnt;
2187
2188
2189 /* Now the same for the wide character table. We need to store some
2190 more information here. */
2191 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2192 iov[2 + cnt].iov_base = NULL;
2193 iov[2 + cnt].iov_len = 0;
2194 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2195 assert (idx[cnt] % 4 == 0);
2196 ++cnt;
2197
2198 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2199 iov[2 + cnt].iov_base = NULL;
2200 iov[2 + cnt].iov_len = 0;
2201 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2202 assert (idx[cnt] % 4 == 0);
2203 ++cnt;
2204
2205 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2206 iov[2 + cnt].iov_base = NULL;
2207 iov[2 + cnt].iov_len = 0;
2208 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2209 assert (idx[cnt] % 4 == 0);
2210 ++cnt;
2211
2212 /* Since we are using the sign of an integer to mark indirection the
2213 offsets in the arrays we are indirectly referring to must not be
2214 zero since -0 == 0. Therefore we add a bit of dummy content. */
2215 obstack_int32_grow (&extrapool, 0);
2216 obstack_int32_grow (&indirectpool, 0);
2217
2218 /* Now insert the `UNDEFINED' value if it is used. Since this value
2219 will probably be used more than once it is good to store the
2220 weights only once. */
2221 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2222 abort ();
2223
2224 /* Generate the table. Walk through the lists of sequences starting
2225 with the same wide character and add them one after the other to
2226 the table. In case we have more than one sequence starting with
2227 the same byte we have to use extra indirection. */
2228 {
2229 void add_to_tablewc (uint32_t ch, struct element_t *runp)
2230 {
2231 if (runp->wcnext == NULL && runp->nwcs == 1)
2232 {
2233 int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2234 collidx_table_add (&tablewc, ch, weigthidx);
2235 }
2236 else
2237 {
2238 /* As for the singlebyte table, we recognize sequences and
2239 compress them. */
2240 struct element_t *lastp;
2241
2242 collidx_table_add (&tablewc, ch,
2243 -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2244
2245 do
2246 {
2247 /* Store the current index in the weight table. We know that
2248 the current position in the `extrapool' is aligned on a
2249 32-bit address. */
2250 int32_t weightidx;
2251 int added;
2252
2253 /* Find out wether this is a single entry or we have more than
2254 one consecutive entry. */
2255 if (runp->wcnext != NULL
2256 && runp->nwcs == runp->wcnext->nwcs
2257 && wmemcmp ((wchar_t *) runp->wcs,
2258 (wchar_t *)runp->wcnext->wcs,
2259 runp->nwcs - 1) == 0
2260 && (runp->wcs[runp->nwcs - 1]
2261 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2262 {
2263 int i;
2264 struct element_t *series_startp = runp;
2265 struct element_t *curp;
2266
2267 /* Now add first the initial byte sequence. */
2268 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2269 if (sizeof (int32_t) == sizeof (int))
2270 obstack_make_room (&extrapool, added);
2271
2272 /* More than one consecutive entry. We mark this by having
2273 a negative index into the indirect table. */
2274 obstack_int32_grow_fast (&extrapool,
2275 -(obstack_object_size (&indirectpool)
2276 / sizeof (int32_t)));
2277 obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2278
2279 do
2280 runp = runp->wcnext;
2281 while (runp->wcnext != NULL
2282 && runp->nwcs == runp->wcnext->nwcs
2283 && wmemcmp ((wchar_t *) runp->wcs,
2284 (wchar_t *)runp->wcnext->wcs,
2285 runp->nwcs - 1) == 0
2286 && (runp->wcs[runp->nwcs - 1]
2287 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2288
2289 /* Now walk backward from here to the beginning. */
2290 curp = runp;
2291
2292 for (i = 1; i < runp->nwcs; ++i)
2293 obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2294
2295 /* Now find the end of the consecutive sequence and
2296 add all the indeces in the indirect pool. */
2297 do
2298 {
2299 weightidx = output_weightwc (&weightpool, collate,
2300 curp);
2301 obstack_int32_grow (&indirectpool, weightidx);
2302
2303 curp = curp->wclast;
2304 }
2305 while (curp != series_startp);
2306
2307 /* Add the final weight. */
2308 weightidx = output_weightwc (&weightpool, collate, curp);
2309 obstack_int32_grow (&indirectpool, weightidx);
2310
2311 /* And add the end byte sequence. Without length this
2312 time. */
2313 for (i = 1; i < curp->nwcs; ++i)
2314 obstack_int32_grow (&extrapool, curp->wcs[i]);
2315 }
2316 else
2317 {
2318 /* A single entry. Simply add the index and the length and
2319 string (except for the first character which is already
2320 tested for). */
2321 int i;
2322
2323 /* Output the weight info. */
2324 weightidx = output_weightwc (&weightpool, collate, runp);
2325
2326 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2327 if (sizeof (int) == sizeof (int32_t))
2328 obstack_make_room (&extrapool, added);
2329
2330 obstack_int32_grow_fast (&extrapool, weightidx);
2331 obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2332 for (i = 1; i < runp->nwcs; ++i)
2333 obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2334 }
2335
2336 /* Next entry. */
2337 lastp = runp;
2338 runp = runp->wcnext;
2339 }
2340 while (runp != NULL);
2341 }
2342 }
2343
2344 tablewc.p = 6;
2345 tablewc.q = 10;
2346 collidx_table_init (&tablewc);
2347
2348 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2349
2350 collidx_table_finalize (&tablewc);
2351 }
2352
2353 /* Now add the four tables. */
2354 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2355 iov[2 + cnt].iov_base = tablewc.result;
2356 iov[2 + cnt].iov_len = tablewc.result_size;
2357 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2358 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2359 assert (idx[cnt] % 4 == 0);
2360 ++cnt;
2361
2362 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2363 iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2364 iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2365 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2366 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2367 assert (idx[cnt] % 4 == 0);
2368 ++cnt;
2369
2370 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2371 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2372 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2373 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2374 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2375 assert (idx[cnt] % 4 == 0);
2376 ++cnt;
2377
2378 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2379 iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2380 iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2381 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2382 assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2383 assert (idx[cnt] % 4 == 0);
2384 ++cnt;
2385
2386
2387 /* Finally write the table with collation element names out. It is
2388 a hash table with a simple function which gets the name of the
2389 character as the input. One character might have many names. The
2390 value associated with the name is an index into the weight table
2391 where we are then interested in the first-level weight value.
2392
2393 To determine how large the table should be we are counting the
2394 elements have to put in. Since we are using internal chaining
2395 using a secondary hash function we have to make the table a bit
2396 larger to avoid extremely long search times. We can achieve
2397 good results with a 40% larger table than there are entries. */
2398 elem_size = 0;
2399 runp = collate->start;
2400 while (runp != NULL)
2401 {
2402 if (runp->mbs != NULL && runp->weights != NULL)
2403 /* Yep, the element really counts. */
2404 ++elem_size;
2405
2406 runp = runp->next;
2407 }
2408 /* Add 40% and find the next prime number. */
2409 elem_size = MIN (next_prime (elem_size * 1.4), 257);
2410
2411 /* Allocate the table. Each entry consists of two words: the hash
2412 value and an index in a secondary table which provides the index
2413 into the weight table and the string itself (so that a match can
2414 be determined). */
2415 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2416 elem_size * 2 * sizeof (uint32_t));
2417 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2418
2419 /* Now add the elements. */
2420 runp = collate->start;
2421 while (runp != NULL)
2422 {
2423 if (runp->mbs != NULL && runp->weights != NULL)
2424 {
2425 /* Compute the hash value of the name. */
2426 uint32_t namelen = strlen (runp->name);
2427 uint32_t hash = elem_hash (runp->name, namelen);
2428 size_t idx = hash % elem_size;
2429
2430 if (elem_table[idx * 2] != 0)
2431 {
2432 /* The spot is already take. Try iterating using the value
2433 from the secondary hashing function. */
2434 size_t iter = hash % (elem_size - 2);
2435
2436 do
2437 {
2438 idx += iter;
2439 if (idx >= elem_size)
2440 idx -= elem_size;
2441 }
2442 while (elem_table[idx * 2] != 0);
2443
2444 /* This is the spot where we will insert the value. */
2445 elem_table[idx * 2] = hash;
2446 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2447
2448 /* The the string itself including length. */
2449 obstack_1grow (&extrapool, namelen);
2450 obstack_grow (&extrapool, runp->name, namelen);
2451
2452 /* And the multibyte representation. */
2453 obstack_1grow (&extrapool, runp->nmbs);
2454 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2455
2456 /* And align again to 32 bits. */
2457 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2458 obstack_grow (&extrapool, "\0\0",
2459 (sizeof (int32_t)
2460 - ((1 + namelen + 1 + runp->nmbs)
2461 % sizeof (int32_t))));
2462
2463 /* Now some 32-bit values: multibyte collation sequence,
2464 wide char string (including length), and wide char
2465 collation sequence. */
2466 obstack_int32_grow (&extrapool, runp->mbseqorder);
2467
2468 obstack_int32_grow (&extrapool, runp->nwcs);
2469 obstack_grow (&extrapool, runp->wcs,
2470 runp->nwcs * sizeof (uint32_t));
2471
2472 obstack_int32_grow (&extrapool, runp->wcseqorder);
2473 }
2474 }
2475
2476 runp = runp->next;
2477 }
2478
2479 /* Prepare to write out this data. */
2480 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2481 iov[2 + cnt].iov_base = &elem_size;
2482 iov[2 + cnt].iov_len = sizeof (int32_t);
2483 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2484 assert (idx[cnt] % 4 == 0);
2485 ++cnt;
2486
2487 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2488 iov[2 + cnt].iov_base = elem_table;
2489 iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2490 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2491 assert (idx[cnt] % 4 == 0);
2492 ++cnt;
2493
2494 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2495 iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2496 iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2497 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2498 ++cnt;
2499
2500 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2501 iov[2 + cnt].iov_base = collate->mbseqorder;
2502 iov[2 + cnt].iov_len = 256;
2503 idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2504 ++cnt;
2505
2506 assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2507 iov[2 + cnt].iov_base = collate->wcseqorder.result;
2508 iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2509 assert (idx[cnt] % 4 == 0);
2510 ++cnt;
2511
2512 assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2513
2514 write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
2515
2516 obstack_free (&weightpool, NULL);
2517 obstack_free (&extrapool, NULL);
2518 obstack_free (&indirectpool, NULL);
2519 }
2520
2521
2522 void
2523 collate_read (struct linereader *ldfile, struct localedef_t *result,
2524 struct charmap_t *charmap, const char *repertoire_name,
2525 int ignore_content)
2526 {
2527 struct repertoire_t *repertoire = NULL;
2528 struct locale_collate_t *collate;
2529 struct token *now;
2530 struct token *arg = NULL;
2531 enum token_t nowtok;
2532 int state = 0;
2533 enum token_t was_ellipsis = tok_none;
2534 struct localedef_t *copy_locale = NULL;
2535
2536 /* Get the repertoire we have to use. */
2537 if (repertoire_name != NULL)
2538 repertoire = repertoire_read (repertoire_name);
2539
2540 /* The rest of the line containing `LC_COLLATE' must be free. */
2541 lr_ignore_rest (ldfile, 1);
2542
2543 do
2544 {
2545 now = lr_token (ldfile, charmap, NULL);
2546 nowtok = now->tok;
2547 }
2548 while (nowtok == tok_eol);
2549
2550 if (nowtok == tok_copy)
2551 {
2552 state = 2;
2553 now = lr_token (ldfile, charmap, NULL);
2554 if (now->tok != tok_string)
2555 {
2556 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2557
2558 skip_category:
2559 do
2560 now = lr_token (ldfile, charmap, NULL);
2561 while (now->tok != tok_eof && now->tok != tok_end);
2562
2563 if (now->tok != tok_eof
2564 || (now = lr_token (ldfile, charmap, NULL), now->tok == tok_eof))
2565 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2566 else if (now->tok != tok_lc_collate)
2567 {
2568 lr_error (ldfile, _("\
2569 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2570 lr_ignore_rest (ldfile, 0);
2571 }
2572 else
2573 lr_ignore_rest (ldfile, 1);
2574
2575 return;
2576 }
2577
2578 if (! ignore_content)
2579 {
2580 /* Get the locale definition. */
2581 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2582 repertoire_name, charmap, NULL);
2583 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2584 {
2585 /* Not yet loaded. So do it now. */
2586 if (locfile_read (copy_locale, charmap) != 0)
2587 goto skip_category;
2588 }
2589 }
2590
2591 lr_ignore_rest (ldfile, 1);
2592
2593 now = lr_token (ldfile, charmap, NULL);
2594 nowtok = now->tok;
2595 }
2596
2597 /* Prepare the data structures. */
2598 collate_startup (ldfile, result, copy_locale, ignore_content);
2599 collate = result->categories[LC_COLLATE].collate;
2600
2601 while (1)
2602 {
2603 char ucs4buf[10];
2604 char *symstr;
2605 size_t symlen;
2606
2607 /* Of course we don't proceed beyond the end of file. */
2608 if (nowtok == tok_eof)
2609 break;
2610
2611 /* Ingore empty lines. */
2612 if (nowtok == tok_eol)
2613 {
2614 now = lr_token (ldfile, charmap, NULL);
2615 nowtok = now->tok;
2616 continue;
2617 }
2618
2619 switch (nowtok)
2620 {
2621 case tok_copy:
2622 /* Allow copying other locales. */
2623 now = lr_token (ldfile, charmap, NULL);
2624 if (now->tok != tok_string)
2625 goto err_label;
2626
2627 if (! ignore_content)
2628 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2629 charmap, result);
2630
2631 lr_ignore_rest (ldfile, 1);
2632 break;
2633
2634 case tok_coll_weight_max:
2635 /* Ignore the rest of the line if we don't need the input of
2636 this line. */
2637 if (ignore_content)
2638 {
2639 lr_ignore_rest (ldfile, 0);
2640 break;
2641 }
2642
2643 if (state != 0)
2644 goto err_label;
2645
2646 arg = lr_token (ldfile, charmap, NULL);
2647 if (arg->tok != tok_number)
2648 goto err_label;
2649 if (collate->col_weight_max != -1)
2650 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2651 "LC_COLLATE", "col_weight_max");
2652 else
2653 collate->col_weight_max = arg->val.num;
2654 lr_ignore_rest (ldfile, 1);
2655 break;
2656
2657 case tok_section_symbol:
2658 /* Ignore the rest of the line if we don't need the input of
2659 this line. */
2660 if (ignore_content)
2661 {
2662 lr_ignore_rest (ldfile, 0);
2663 break;
2664 }
2665
2666 if (state != 0)
2667 goto err_label;
2668
2669 arg = lr_token (ldfile, charmap, repertoire);
2670 if (arg->tok != tok_bsymbol)
2671 goto err_label;
2672 else if (!ignore_content)
2673 {
2674 /* Check whether this section is already known. */
2675 struct section_list *known = collate->sections;
2676 while (known != NULL)
2677 {
2678 if (strcmp (known->name, arg->val.str.startmb) == 0)
2679 break;
2680 known = known->next;
2681 }
2682
2683 if (known != NULL)
2684 {
2685 lr_error (ldfile,
2686 _("%s: duplicate declaration of section `%s'"),
2687 "LC_COLLATE", arg->val.str.startmb);
2688 free (arg->val.str.startmb);
2689 }
2690 else
2691 collate->sections = make_seclist_elem (collate,
2692 arg->val.str.startmb,
2693 collate->sections);
2694
2695 lr_ignore_rest (ldfile, known == NULL);
2696 }
2697 else
2698 {
2699 free (arg->val.str.startmb);
2700 lr_ignore_rest (ldfile, 0);
2701 }
2702 break;
2703
2704 case tok_collating_element:
2705 /* Ignore the rest of the line if we don't need the input of
2706 this line. */
2707 if (ignore_content)
2708 {
2709 lr_ignore_rest (ldfile, 0);
2710 break;
2711 }
2712
2713 if (state != 0)
2714 goto err_label;
2715
2716 arg = lr_token (ldfile, charmap, repertoire);
2717 if (arg->tok != tok_bsymbol)
2718 goto err_label;
2719 else
2720 {
2721 const char *symbol = arg->val.str.startmb;
2722 size_t symbol_len = arg->val.str.lenmb;
2723
2724 /* Next the `from' keyword. */
2725 arg = lr_token (ldfile, charmap, repertoire);
2726 if (arg->tok != tok_from)
2727 {
2728 free ((char *) symbol);
2729 goto err_label;
2730 }
2731
2732 ldfile->return_widestr = 1;
2733 ldfile->translate_strings = 1;
2734
2735 /* Finally the string with the replacement. */
2736 arg = lr_token (ldfile, charmap, repertoire);
2737
2738 ldfile->return_widestr = 0;
2739 ldfile->translate_strings = 0;
2740
2741 if (arg->tok != tok_string)
2742 goto err_label;
2743
2744 if (!ignore_content && symbol != NULL)
2745 {
2746 /* The name is already defined. */
2747 if (check_duplicate (ldfile, collate, charmap,
2748 repertoire, symbol, symbol_len))
2749 goto col_elem_free;
2750
2751 if (arg->val.str.startmb != NULL)
2752 insert_entry (&collate->elem_table, symbol, symbol_len,
2753 new_element (collate,
2754 arg->val.str.startmb,
2755 arg->val.str.lenmb - 1,
2756 arg->val.str.startwc,
2757 symbol, symbol_len, 0));
2758 }
2759 else
2760 {
2761 col_elem_free:
2762 if (symbol != NULL)
2763 free ((char *) symbol);
2764 if (arg->val.str.startmb != NULL)
2765 free (arg->val.str.startmb);
2766 if (arg->val.str.startwc != NULL)
2767 free (arg->val.str.startwc);
2768 }
2769 lr_ignore_rest (ldfile, 1);
2770 }
2771 break;
2772
2773 case tok_collating_symbol:
2774 /* Ignore the rest of the line if we don't need the input of
2775 this line. */
2776 if (ignore_content)
2777 {
2778 lr_ignore_rest (ldfile, 0);
2779 break;
2780 }
2781
2782 if (state != 0 && state != 2)
2783 goto err_label;
2784
2785 arg = lr_token (ldfile, charmap, repertoire);
2786 if (arg->tok != tok_bsymbol)
2787 goto err_label;
2788 else
2789 {
2790 char *symbol = arg->val.str.startmb;
2791 size_t symbol_len = arg->val.str.lenmb;
2792 char *endsymbol = NULL;
2793 size_t endsymbol_len = 0;
2794 enum token_t ellipsis = tok_none;
2795
2796 arg = lr_token (ldfile, charmap, repertoire);
2797 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2798 {
2799 ellipsis = arg->tok;
2800
2801 arg = lr_token (ldfile, charmap, repertoire);
2802 if (arg->tok != tok_bsymbol)
2803 {
2804 free (symbol);
2805 goto err_label;
2806 }
2807
2808 endsymbol = arg->val.str.startmb;
2809 endsymbol_len = arg->val.str.lenmb;
2810
2811 lr_ignore_rest (ldfile, 1);
2812 }
2813 else if (arg->tok != tok_eol)
2814 {
2815 free (symbol);
2816 goto err_label;
2817 }
2818
2819 if (!ignore_content)
2820 {
2821 if (symbol == NULL
2822 || (ellipsis != tok_none && endsymbol == NULL))
2823 {
2824 lr_error (ldfile, _("\
2825 %s: unknown character in collating symbol name"),
2826 "LC_COLLATE");
2827 goto col_sym_free;
2828 }
2829 else if (ellipsis == tok_none)
2830 {
2831 /* The name is already defined. */
2832 if (check_duplicate (ldfile, collate, charmap,
2833 repertoire, symbol, symbol_len))
2834 goto col_sym_free;
2835
2836 insert_entry (&collate->sym_table, symbol, symbol_len,
2837 new_symbol (collate, symbol, symbol_len));
2838 }
2839 else if (symbol_len != endsymbol_len)
2840 {
2841 col_sym_inv_range:
2842 lr_error (ldfile,
2843 _("invalid names for character range"));
2844 goto col_sym_free;
2845 }
2846 else
2847 {
2848 /* Oh my, we have to handle an ellipsis. First, as
2849 usual, determine the common prefix and then
2850 convert the rest into a range. */
2851 size_t prefixlen;
2852 unsigned long int from;
2853 unsigned long int to;
2854 char *endp;
2855
2856 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2857 if (symbol[prefixlen] != endsymbol[prefixlen])
2858 break;
2859
2860 /* Convert the rest into numbers. */
2861 symbol[symbol_len] = '\0';
2862 from = strtoul (&symbol[prefixlen], &endp,
2863 ellipsis == tok_ellipsis2 ? 16 : 10);
2864 if (*endp != '\0')
2865 goto col_sym_inv_range;
2866
2867 endsymbol[symbol_len] = '\0';
2868 to = strtoul (&endsymbol[prefixlen], &endp,
2869 ellipsis == tok_ellipsis2 ? 16 : 10);
2870 if (*endp != '\0')
2871 goto col_sym_inv_range;
2872
2873 if (from > to)
2874 goto col_sym_inv_range;
2875
2876 /* Now loop over all entries. */
2877 while (from <= to)
2878 {
2879 char *symbuf;
2880
2881 symbuf = (char *) obstack_alloc (&collate->mempool,
2882 symbol_len + 1);
2883
2884 /* Create the name. */
2885 sprintf (symbuf,
2886 ellipsis == tok_ellipsis2
2887 ? "%.*s%.*lX" : "%.*s%.*lX",
2888 (int) prefixlen, symbol,
2889 (int) (symbol_len - prefixlen), from);
2890
2891 /* The name is already defined. */
2892 if (check_duplicate (ldfile, collate, charmap,
2893 repertoire, symbuf, symbol_len))
2894 goto col_sym_free;
2895
2896 insert_entry (&collate->sym_table, symbuf,
2897 symbol_len,
2898 new_symbol (collate, symbuf,
2899 symbol_len));
2900
2901 /* Increment the counter. */
2902 ++from;
2903 }
2904
2905 goto col_sym_free;
2906 }
2907 }
2908 else
2909 {
2910 col_sym_free:
2911 if (symbol != NULL)
2912 free (symbol);
2913 if (endsymbol != NULL)
2914 free (endsymbol);
2915 }
2916 }
2917 break;
2918
2919 case tok_symbol_equivalence:
2920 /* Ignore the rest of the line if we don't need the input of
2921 this line. */
2922 if (ignore_content)
2923 {
2924 lr_ignore_rest (ldfile, 0);
2925 break;
2926 }
2927
2928 if (state != 0)
2929 goto err_label;
2930
2931 arg = lr_token (ldfile, charmap, repertoire);
2932 if (arg->tok != tok_bsymbol)
2933 goto err_label;
2934 else
2935 {
2936 const char *newname = arg->val.str.startmb;
2937 size_t newname_len = arg->val.str.lenmb;
2938 const char *symname;
2939 size_t symname_len;
2940 struct symbol_t *symval;
2941
2942 arg = lr_token (ldfile, charmap, repertoire);
2943 if (arg->tok != tok_bsymbol)
2944 {
2945 if (newname != NULL)
2946 free ((char *) newname);
2947 goto err_label;
2948 }
2949
2950 symname = arg->val.str.startmb;
2951 symname_len = arg->val.str.lenmb;
2952
2953 if (newname == NULL)
2954 {
2955 lr_error (ldfile, _("\
2956 %s: unknown character in equivalent definition name"),
2957 "LC_COLLATE");
2958
2959 sym_equiv_free:
2960 if (newname != NULL)
2961 free ((char *) newname);
2962 if (symname != NULL)
2963 free ((char *) symname);
2964 break;
2965 }
2966 if (symname == NULL)
2967 {
2968 lr_error (ldfile, _("\
2969 %s: unknown character in equivalent definition value"),
2970 "LC_COLLATE");
2971 goto sym_equiv_free;
2972 }
2973
2974 /* See whether the symbol name is already defined. */
2975 if (find_entry (&collate->sym_table, symname, symname_len,
2976 (void **) &symval) != 0)
2977 {
2978 lr_error (ldfile, _("\
2979 %s: unknown symbol `%s' in equivalent definition"),
2980 "LC_COLLATE", symname);
2981 goto col_sym_free;
2982 }
2983
2984 if (insert_entry (&collate->sym_table,
2985 newname, newname_len, symval) < 0)
2986 {
2987 lr_error (ldfile, _("\
2988 error while adding equivalent collating symbol"));
2989 goto sym_equiv_free;
2990 }
2991
2992 free ((char *) symname);
2993 }
2994 lr_ignore_rest (ldfile, 1);
2995 break;
2996
2997 case tok_script:
2998 /* We get told about the scripts we know. */
2999 arg = lr_token (ldfile, charmap, repertoire);
3000 if (arg->tok != tok_bsymbol)
3001 goto err_label;
3002 else
3003 {
3004 struct section_list *runp = collate->known_sections;
3005 char *name;
3006
3007 while (runp != NULL)
3008 if (strncmp (runp->name, arg->val.str.startmb,
3009 arg->val.str.lenmb) == 0
3010 && runp->name[arg->val.str.lenmb] == '\0')
3011 break;
3012 else
3013 runp = runp->def_next;
3014
3015 if (runp != NULL)
3016 {
3017 lr_error (ldfile, _("duplicate definition of script `%s'"),
3018 runp->name);
3019 lr_ignore_rest (ldfile, 0);
3020 break;
3021 }
3022
3023 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3024 name = strncpy (xmalloc (arg->val.str.lenmb + 1),
3025 arg->val.str.startmb, arg->val.str.lenmb);
3026 name[arg->val.str.lenmb] = '\0';
3027 runp->name = name;
3028
3029 runp->def_next = collate->known_sections;
3030 collate->known_sections = runp;
3031 }
3032 lr_ignore_rest (ldfile, 1);
3033 break;
3034
3035 case tok_order_start:
3036 /* Ignore the rest of the line if we don't need the input of
3037 this line. */
3038 if (ignore_content)
3039 {
3040 lr_ignore_rest (ldfile, 0);
3041 break;
3042 }
3043
3044 if (state != 0 && state != 1)
3045 goto err_label;
3046 state = 1;
3047
3048 /* The 14652 draft does not specify whether all `order_start' lines
3049 must contain the same number of sort-rules, but 14651 does. So
3050 we require this here as well. */
3051 arg = lr_token (ldfile, charmap, repertoire);
3052 if (arg->tok == tok_bsymbol)
3053 {
3054 /* This better should be a section name. */
3055 struct section_list *sp = collate->known_sections;
3056 while (sp != NULL
3057 && (sp->name == NULL
3058 || strncmp (sp->name, arg->val.str.startmb,
3059 arg->val.str.lenmb) != 0
3060 || sp->name[arg->val.str.lenmb] != '\0'))
3061 sp = sp->def_next;
3062
3063 if (sp == NULL)
3064 {
3065 lr_error (ldfile, _("\
3066 %s: unknown section name `%s'"),
3067 "LC_COLLATE", arg->val.str.startmb);
3068 /* We use the error section. */
3069 collate->current_section = &collate->error_section;
3070
3071 if (collate->error_section.first == NULL)
3072 {
3073 if (collate->sections == NULL)
3074 collate->sections = &collate->error_section;
3075 else
3076 {
3077 sp = collate->sections;
3078 while (sp->next != NULL)
3079 sp = sp->next;
3080
3081 collate->error_section.next = NULL;
3082 sp->next = &collate->error_section;
3083 }
3084 }
3085 }
3086 else
3087 {
3088 /* One should not be allowed to open the same
3089 section twice. */
3090 if (sp->first != NULL)
3091 lr_error (ldfile, _("\
3092 %s: multiple order definitions for section `%s'"),
3093 "LC_COLLATE", sp->name);
3094 else
3095 {
3096 if (collate->current_section == NULL)
3097 collate->current_section = sp;
3098 else
3099 {
3100 sp->next = collate->current_section->next;
3101 collate->current_section->next = sp;
3102 }
3103 }
3104
3105 /* Next should come the end of the line or a semicolon. */
3106 arg = lr_token (ldfile, charmap, repertoire);
3107 if (arg->tok == tok_eol)
3108 {
3109 uint32_t cnt;
3110
3111 /* This means we have exactly one rule: `forward'. */
3112 if (nrules > 1)
3113 lr_error (ldfile, _("\
3114 %s: invalid number of sorting rules"),
3115 "LC_COLLATE");
3116 else
3117 nrules = 1;
3118 sp->rules = obstack_alloc (&collate->mempool,
3119 (sizeof (enum coll_sort_rule)
3120 * nrules));
3121 for (cnt = 0; cnt < nrules; ++cnt)
3122 sp->rules[cnt] = sort_forward;
3123
3124 /* Next line. */
3125 break;
3126 }
3127
3128 /* Get the next token. */
3129 arg = lr_token (ldfile, charmap, repertoire);
3130 }
3131 }
3132 else
3133 {
3134 /* There is no section symbol. Therefore we use the unnamed
3135 section. */
3136 collate->current_section = &collate->unnamed_section;
3137
3138 if (collate->unnamed_section.first != NULL)
3139 lr_error (ldfile, _("\
3140 %s: multiple order definitions for unnamed section"),
3141 "LC_COLLATE");
3142 else
3143 {
3144 collate->unnamed_section.next = collate->sections;
3145 collate->sections = &collate->unnamed_section;
3146 }
3147 }
3148
3149 /* Now read the direction names. */
3150 read_directions (ldfile, arg, charmap, repertoire, collate);
3151
3152 /* From now be need the strings untranslated. */
3153 ldfile->translate_strings = 0;
3154 break;
3155
3156 case tok_order_end:
3157 /* Ignore the rest of the line if we don't need the input of
3158 this line. */
3159 if (ignore_content)
3160 {
3161 lr_ignore_rest (ldfile, 0);
3162 break;
3163 }
3164
3165 if (state != 1)
3166 goto err_label;
3167
3168 /* Handle ellipsis at end of list. */
3169 if (was_ellipsis != tok_none)
3170 {
3171 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3172 repertoire, collate);
3173 was_ellipsis = tok_none;
3174 }
3175
3176 state = 2;
3177 lr_ignore_rest (ldfile, 1);
3178 break;
3179
3180 case tok_reorder_after:
3181 /* Ignore the rest of the line if we don't need the input of
3182 this line. */
3183 if (ignore_content)
3184 {
3185 lr_ignore_rest (ldfile, 0);
3186 break;
3187 }
3188
3189 if (state == 1)
3190 {
3191 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3192 "LC_COLLATE");
3193 state = 2;
3194
3195 /* Handle ellipsis at end of list. */
3196 if (was_ellipsis != tok_none)
3197 {
3198 handle_ellipsis (ldfile, arg->val.str.startmb,
3199 arg->val.str.lenmb, was_ellipsis, charmap,
3200 repertoire, collate);
3201 was_ellipsis = tok_none;
3202 }
3203 }
3204 else if (state != 2 && state != 3)
3205 goto err_label;
3206 state = 3;
3207
3208 arg = lr_token (ldfile, charmap, repertoire);
3209 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3210 {
3211 /* Find this symbol in the sequence table. */
3212 char ucsbuf[10];
3213 char *startmb;
3214 size_t lenmb;
3215 struct element_t *insp;
3216 int no_error = 1;
3217
3218 if (arg->tok == tok_bsymbol)
3219 {
3220 startmb = arg->val.str.startmb;
3221 lenmb = arg->val.str.lenmb;
3222 }
3223 else
3224 {
3225 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3226 startmb = ucsbuf;
3227 lenmb = 9;
3228 }
3229
3230 if (find_entry (&collate->seq_table, startmb, lenmb,
3231 (void **) &insp) == 0)
3232 /* Yes, the symbol exists. Simply point the cursor
3233 to it. */
3234 collate->cursor = insp;
3235 else
3236 {
3237 struct symbol_t *symbp;
3238
3239 if (find_entry (&collate->sym_table, startmb, lenmb,
3240 (void **) &symbp) == 0)
3241 {
3242 if (symbp->order->last != NULL
3243 || symbp->order->next != NULL)
3244 collate->cursor = symbp->order;
3245 else
3246 {
3247 /* This is a collating symbol but its position
3248 is not yet defined. */
3249 lr_error (ldfile, _("\
3250 %s: order for collating symbol %.*s not yet defined"),
3251 "LC_COLLATE", (int) lenmb, startmb);
3252 collate->cursor = NULL;
3253 no_error = 0;
3254 }
3255 }
3256 else if (find_entry (&collate->elem_table, startmb, lenmb,
3257 (void **) &insp) == 0)
3258 {
3259 if (insp->last != NULL || insp->next != NULL)
3260 collate->cursor = insp;
3261 else
3262 {
3263 /* This is a collating element but its position
3264 is not yet defined. */
3265 lr_error (ldfile, _("\
3266 %s: order for collating element %.*s not yet defined"),
3267 "LC_COLLATE", (int) lenmb, startmb);
3268 collate->cursor = NULL;
3269 no_error = 0;
3270 }
3271 }
3272 else
3273 {
3274 /* This is bad. The symbol after which we have to
3275 insert does not exist. */
3276 lr_error (ldfile, _("\
3277 %s: cannot reorder after %.*s: symbol not known"),
3278 "LC_COLLATE", (int) lenmb, startmb);
3279 collate->cursor = NULL;
3280 no_error = 0;
3281 }
3282 }
3283
3284 lr_ignore_rest (ldfile, no_error);
3285 }
3286 else
3287 /* This must not happen. */
3288 goto err_label;
3289 break;
3290
3291 case tok_reorder_end:
3292 /* Ignore the rest of the line if we don't need the input of
3293 this line. */
3294 if (ignore_content)
3295 break;
3296
3297 if (state != 3)
3298 goto err_label;
3299 state = 4;
3300 lr_ignore_rest (ldfile, 1);
3301 break;
3302
3303 case tok_reorder_sections_after:
3304 /* Ignore the rest of the line if we don't need the input of
3305 this line. */
3306 if (ignore_content)
3307 {
3308 lr_ignore_rest (ldfile, 0);
3309 break;
3310 }
3311
3312 if (state == 1)
3313 {
3314 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3315 "LC_COLLATE");
3316 state = 2;
3317
3318 /* Handle ellipsis at end of list. */
3319 if (was_ellipsis != tok_none)
3320 {
3321 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3322 repertoire, collate);
3323 was_ellipsis = tok_none;
3324 }
3325 }
3326 else if (state == 3)
3327 {
3328 error (0, 0, _("%s: missing `reorder-end' keyword"),
3329 "LC_COLLATE");
3330 state = 4;
3331 }
3332 else if (state != 2 && state != 4)
3333 goto err_label;
3334 state = 5;
3335
3336 /* Get the name of the sections we are adding after. */
3337 arg = lr_token (ldfile, charmap, repertoire);
3338 if (arg->tok == tok_bsymbol)
3339 {
3340 /* Now find a section with this name. */
3341 struct section_list *runp = collate->sections;
3342
3343 while (runp != NULL)
3344 {
3345 if (runp->name != NULL
3346 && strlen (runp->name) == arg->val.str.lenmb
3347 && memcmp (runp->name, arg->val.str.startmb,
3348 arg->val.str.lenmb) == 0)
3349 break;
3350
3351 runp = runp->next;
3352 }
3353
3354 if (runp != NULL)
3355 collate->current_section = runp;
3356 else
3357 {
3358 /* This is bad. The section after which we have to
3359 reorder does not exist. Therefore we cannot
3360 process the whole rest of this reorder
3361 specification. */
3362 lr_error (ldfile, _("%s: section `%.*s' not known"),
3363 "LC_COLLATE", (int) arg->val.str.lenmb,
3364 arg->val.str.startmb);
3365
3366 do
3367 {
3368 lr_ignore_rest (ldfile, 0);
3369
3370 now = lr_token (ldfile, charmap, NULL);
3371 }
3372 while (now->tok == tok_reorder_sections_after
3373 || now->tok == tok_reorder_sections_end
3374 || now->tok == tok_end);
3375
3376 /* Process the token we just saw. */
3377 nowtok = now->tok;
3378 continue;
3379 }
3380 }
3381 else
3382 /* This must not happen. */
3383 goto err_label;
3384 break;
3385
3386 case tok_reorder_sections_end:
3387 /* Ignore the rest of the line if we don't need the input of
3388 this line. */
3389 if (ignore_content)
3390 break;
3391
3392 if (state != 5)
3393 goto err_label;
3394 state = 6;
3395 lr_ignore_rest (ldfile, 1);
3396 break;
3397
3398 case tok_bsymbol:
3399 case tok_ucs4:
3400 /* Ignore the rest of the line if we don't need the input of
3401 this line. */
3402 if (ignore_content)
3403 {
3404 lr_ignore_rest (ldfile, 0);
3405 break;
3406 }
3407
3408 if (state != 0 && state != 1 && state != 3 && state != 5)
3409 goto err_label;
3410
3411 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3412 goto err_label;
3413
3414 if (nowtok == tok_ucs4)
3415 {
3416 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3417 symstr = ucs4buf;
3418 symlen = 9;
3419 }
3420 else
3421 {
3422 symstr = arg->val.str.startmb;
3423 symlen = arg->val.str.lenmb;
3424 }
3425
3426 if (state == 0)
3427 {
3428 /* We are outside an `order_start' region. This means
3429 we must only accept definitions of values for
3430 collation symbols since these are purely abstract
3431 values and don't need dorections associated. */
3432 struct element_t *seqp;
3433
3434 if (find_entry (&collate->seq_table, symstr, symlen,
3435 (void **) &seqp) == 0)
3436 {
3437 /* It's already defined. First check whether this
3438 is really a collating symbol. */
3439 if (seqp->is_character)
3440 goto err_label;
3441
3442 goto move_entry;
3443 }
3444 else
3445 {
3446 void *result;
3447
3448 if (find_entry (&collate->sym_table, symstr, symlen,
3449 &result) != 0)
3450 /* No collating symbol, it's an error. */
3451 goto err_label;
3452
3453 /* Maybe this is the first time we define a symbol
3454 value and it is before the first actual section. */
3455 if (collate->sections == NULL)
3456 collate->sections = collate->current_section =
3457 &collate->symbol_section;
3458 }
3459
3460 if (was_ellipsis != tok_none)
3461 {
3462
3463 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3464 charmap, repertoire, collate);
3465
3466 /* Remember that we processed the ellipsis. */
3467 was_ellipsis = tok_none;
3468
3469 /* And don't add the value a second time. */
3470 break;
3471 }
3472 }
3473 else if (state == 3)
3474 {
3475 /* It is possible that we already have this collation sequence.
3476 In this case we move the entry. */
3477 struct element_t *seqp;
3478 void *sym;
3479
3480 /* If the symbol after which we have to insert was not found
3481 ignore all entries. */
3482 if (collate->cursor == NULL)
3483 {
3484 lr_ignore_rest (ldfile, 0);
3485 break;
3486 }
3487
3488 if (find_entry (&collate->seq_table, symstr, symlen,
3489 (void **) &seqp) == 0)
3490 goto move_entry;
3491
3492 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3493 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3494 goto move_entry;
3495
3496 if (find_entry (&collate->elem_table, symstr, symlen,
3497 (void **) &seqp) == 0)
3498 {
3499 move_entry:
3500 /* Remove the entry from the old position. */
3501 if (seqp->last == NULL)
3502 collate->start = seqp->next;
3503 else
3504 seqp->last->next = seqp->next;
3505 if (seqp->next != NULL)
3506 seqp->next->last = seqp->last;
3507
3508 /* We also have to check whether this entry is the
3509 first or last of a section. */
3510 if (seqp->section->first == seqp)
3511 {
3512 if (seqp->section->first == seqp->section->last)
3513 /* This setion has no content anymore. */
3514 seqp->section->first = seqp->section->last = NULL;
3515 else
3516 seqp->section->first = seqp->next;
3517 }
3518 else if (seqp->section->last == seqp)
3519 seqp->section->last = seqp->last;
3520
3521 /* Now insert it in the new place. */
3522 insert_weights (ldfile, seqp, charmap, repertoire, collate,
3523 tok_none);
3524 break;
3525 }
3526
3527 /* Otherwise we just add a new entry. */
3528 }
3529 else if (state == 5)
3530 {
3531 /* We are reordering sections. Find the named section. */
3532 struct section_list *runp = collate->sections;
3533 struct section_list *prevp = NULL;
3534
3535 while (runp != NULL)
3536 {
3537 if (runp->name != NULL
3538 && strlen (runp->name) == symlen
3539 && memcmp (runp->name, symstr, symlen) == 0)
3540 break;
3541
3542 prevp = runp;
3543 runp = runp->next;
3544 }
3545
3546 if (runp == NULL)
3547 {
3548 lr_error (ldfile, _("%s: section `%.*s' not known"),
3549 "LC_COLLATE", (int) symlen, symstr);
3550 lr_ignore_rest (ldfile, 0);
3551 }
3552 else
3553 {
3554 if (runp != collate->current_section)
3555 {
3556 /* Remove the named section from the old place and
3557 insert it in the new one. */
3558 prevp->next = runp->next;
3559
3560 runp->next = collate->current_section->next;
3561 collate->current_section->next = runp;
3562 collate->current_section = runp;
3563 }
3564
3565 /* Process the rest of the line which might change
3566 the collation rules. */
3567 arg = lr_token (ldfile, charmap, repertoire);
3568 if (arg->tok != tok_eof && arg->tok != tok_eol)
3569 read_directions (ldfile, arg, charmap, repertoire,
3570 collate);
3571 }
3572 break;
3573 }
3574 else if (was_ellipsis != tok_none)
3575 {
3576 /* Using the information in the `ellipsis_weight'
3577 element and this and the last value we have to handle
3578 the ellipsis now. */
3579 assert (state == 1);
3580
3581 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3582 repertoire, collate);
3583
3584 /* Remember that we processed the ellipsis. */
3585 was_ellipsis = tok_none;
3586
3587 /* And don't add the value a second time. */
3588 break;
3589 }
3590
3591 /* Now insert in the new place. */
3592 insert_value (ldfile, symstr, symlen, charmap, repertoire, collate);
3593 break;
3594
3595 case tok_undefined:
3596 /* Ignore the rest of the line if we don't need the input of
3597 this line. */
3598 if (ignore_content)
3599 {
3600 lr_ignore_rest (ldfile, 0);
3601 break;
3602 }
3603
3604 if (state != 1)
3605 goto err_label;
3606
3607 if (was_ellipsis != tok_none)
3608 {
3609 lr_error (ldfile,
3610 _("%s: cannot have `%s' as end of ellipsis range"),
3611 "LC_COLLATE", "UNDEFINED");
3612
3613 unlink_element (collate);
3614 was_ellipsis = tok_none;
3615 }
3616
3617 /* See whether UNDEFINED already appeared somewhere. */
3618 if (collate->undefined.next != NULL
3619 || (collate->cursor != NULL
3620 && collate->undefined.next == collate->cursor))
3621 {
3622 lr_error (ldfile,
3623 _("%s: order for `%.*s' already defined at %s:%Zu"),
3624 "LC_COLLATE", 9, "UNDEFINED",
3625 collate->undefined.file,
3626 collate->undefined.line);
3627 lr_ignore_rest (ldfile, 0);
3628 }
3629 else
3630 /* Parse the weights. */
3631 insert_weights (ldfile, &collate->undefined, charmap,
3632 repertoire, collate, tok_none);
3633 break;
3634
3635 case tok_ellipsis2:
3636 case tok_ellipsis3:
3637 case tok_ellipsis4:
3638 /* This is the symbolic (decimal or hexadecimal) or absolute
3639 ellipsis. */
3640 if (was_ellipsis != tok_none)
3641 goto err_label;
3642
3643 if (state != 0 && state != 1 && state != 3)
3644 goto err_label;
3645
3646 was_ellipsis = nowtok;
3647
3648 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3649 repertoire, collate, nowtok);
3650 break;
3651
3652 case tok_end:
3653 /* Next we assume `LC_COLLATE'. */
3654 if (!ignore_content)
3655 {
3656 if (state == 0)
3657 /* We must either see a copy statement or have
3658 ordering values. */
3659 lr_error (ldfile,
3660 _("%s: empty category description not allowed"),
3661 "LC_COLLATE");
3662 else if (state == 1)
3663 {
3664 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3665 "LC_COLLATE");
3666
3667 /* Handle ellipsis at end of list. */
3668 if (was_ellipsis != tok_none)
3669 {
3670 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3671 repertoire, collate);
3672 was_ellipsis = tok_none;
3673 }
3674 }
3675 else if (state == 3)
3676 error (0, 0, _("%s: missing `reorder-end' keyword"),
3677 "LC_COLLATE");
3678 else if (state == 5)
3679 error (0, 0, _("%s: missing `reorder-sections-end' keyword"),
3680 "LC_COLLATE");
3681 }
3682 arg = lr_token (ldfile, charmap, NULL);
3683 if (arg->tok == tok_eof)
3684 break;
3685 if (arg->tok == tok_eol)
3686 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3687 else if (arg->tok != tok_lc_collate)
3688 lr_error (ldfile, _("\
3689 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3690 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3691 return;
3692
3693 default:
3694 err_label:
3695 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3696 }
3697
3698 /* Prepare for the next round. */
3699 now = lr_token (ldfile, charmap, NULL);
3700 nowtok = now->tok;
3701 }
3702
3703 /* When we come here we reached the end of the file. */
3704 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3705 }