]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/charmap.c
Update.
[thirdparty/glibc.git] / locale / programs / charmap.c
CommitLineData
74015205 1/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
df4ef2ab
UD
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
19bc17a9 4
df4ef2ab
UD
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
19bc17a9 9
df4ef2ab
UD
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
19bc17a9 14
df4ef2ab
UD
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19bc17a9
RM
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include <ctype.h>
51702635 25#include <dirent.h>
19bc17a9
RM
26#include <errno.h>
27#include <libintl.h>
28#include <obstack.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include "error.h"
34#include "linereader.h"
35#include "charset.h"
69f155d4
UD
36#include "locfile.h"
37#include "repertoire.h"
19bc17a9
RM
38
39
40/* Uncomment following line for production version. */
41/* define NDEBUG 1 */
42#include <assert.h>
43
44
45/* Define the lookup function. */
46#include "charmap-kw.h"
47
48
04be94a8 49extern void *xmalloc (size_t __n);
19bc17a9
RM
50
51/* Prototypes for local functions. */
52static struct charset_t *parse_charmap (const char *filename);
75cd5204
RM
53static void new_width (struct linereader *cmfile, struct charset_t *result,
54 const char *from, const char *to,
55 unsigned long int width);
19bc17a9
RM
56
57
58struct charset_t *
59charmap_read (const char *filename)
60{
61 const char *pathnfile;
62 struct charset_t *result = NULL;
63
64 if (filename != NULL)
65 {
66 if (euidaccess (filename, R_OK) >= 0)
67 pathnfile = filename;
04be94a8 68 else if (filename[0] != '/')
19bc17a9
RM
69 {
70 char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
71 stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
72
73 pathnfile = (const char *) cp;
74 }
04be94a8
UD
75 else
76 pathnfile = NULL;
19bc17a9 77
04be94a8
UD
78 if (pathnfile != NULL)
79 {
80 result = parse_charmap (pathnfile);
19bc17a9 81
04be94a8
UD
82 if (result == NULL && !be_quiet)
83 error (0, errno, _("character map file `%s' not found"), filename);
84 }
19bc17a9
RM
85 }
86
51702635
UD
87 if (result == NULL)
88 {
89 /* OK, one more try. We also accept the names given to the
90 character sets in the files. Sometimes they differ from the
91 file name. */
92 DIR *dir;
93 struct dirent *dirent;
94
95 dir = opendir (CHARMAP_PATH);
96 if (dir == NULL)
97 {
98 while ((dirent = readdir (dir)) != NULL)
99 if (strcmp (dirent->d_name, ".") != 0
100 && strcmp (dirent->d_name, "..") != 0)
101 {
102 char buf[sizeof (CHARMAP_PATH)
103 + strlen (dirent->d_name) + 1];
104 FILE *fp;
105#ifdef _DIRENT_HAVE_D_TYPE
106 if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_REG)
107 continue;
108#endif
109 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"),
110 dirent->d_name);
111
112 fp = fopen (buf, "r");
113 if (fp != NULL)
114 {
115 char *name = NULL;
116
117 while (!feof (fp))
118 {
119 char junk[BUFSIZ];
120
04be94a8
UD
121 if (fscanf (fp, " <code_set_name> %as", &name) == 1
122 || fscanf (fp, "%% alias %as", &name) == 1)
123 {
124 if (strcasecmp (name, filename) == 0)
125 break;
126
127 free (name);
128 name = NULL;
129 }
130
131 if (fgets (junk, sizeof junk, fp) != NULL)
132 {
133 if (strstr (junk, "CHARMAP") != NULL)
134 /* We cannot expect more aliases from now on. */
135 break;
51702635 136
04be94a8
UD
137 while (strchr (junk, '\n') == NULL
138 && fgets (junk, sizeof junk, fp) != NULL)
139 continue;
140 }
51702635
UD
141 }
142
143 fclose (fp);
144
145 if (name != NULL)
146 {
04be94a8 147 result = parse_charmap (buf);
51702635 148
04be94a8 149 free (buf);
51702635 150
04be94a8
UD
151 if (result)
152 return result;
51702635 153
04be94a8 154 break;
51702635
UD
155 }
156 }
157 }
158
159 closedir (dir);
160 }
161 }
162
19bc17a9
RM
163 if (result == NULL)
164 {
165 pathnfile = CHARMAP_PATH "/" DEFAULT_CHARMAP;
166
167 result = parse_charmap (pathnfile);
168
169 if (result == NULL)
170 error (4, errno, _("default character map file `%s' not found"),
171 DEFAULT_CHARMAP);
172 }
173
174 return result;
175}
176
177
178static struct charset_t *
179parse_charmap (const char *filename)
180{
181 struct linereader *cmfile;
182 struct charset_t *result;
183 int state;
184 enum token_t expected_tok = tok_error;
185 const char *expected_str = NULL;
186 char *from_name = NULL;
187 char *to_name = NULL;
188
189 /* Determine path. */
190 cmfile = lr_open (filename, charmap_hash);
191 if (cmfile == NULL)
192 {
193 if (strchr (filename, '/') == NULL)
194 {
195 /* Look in the systems charmap directory. */
196 char *buf = xmalloc (strlen (filename) + 1 + sizeof (CHARMAP_PATH));
197
198 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"), filename);
199 cmfile = lr_open (buf, charmap_hash);
200
201 if (cmfile == NULL)
202 free (buf);
203 }
204
205 if (cmfile == NULL)
206 return NULL;
207 }
208
209 /* Allocate room for result. */
210 result = (struct charset_t *) xmalloc (sizeof (struct charset_t));
211 memset (result, '\0', sizeof (struct charset_t));
75cd5204
RM
212 /* The default DEFAULT_WIDTH is 1. */
213 result->width_default = 1;
69f155d4
UD
214 /* Let the user overwrite the repertoire map we use. */
215 result->repertoiremap = repertoiremap;
19bc17a9 216
df4ef2ab 217#define obstack_chunk_alloc malloc
19bc17a9
RM
218#define obstack_chunk_free free
219 obstack_init (&result->mem_pool);
220
221 if (init_hash (&result->char_table, 256))
222 {
223 free (result);
224 return NULL;
225 }
226
227 /* We use a state machine to describe the charmap description file
228 format. */
229 state = 1;
230 while (1)
231 {
232 /* What's on? */
233 struct token *now = lr_token (cmfile, NULL);
234 enum token_t nowtok = now->tok;
235 struct token *arg;
236
237 if (nowtok == tok_eof)
238 break;
239
240 switch (state)
241 {
242 case 1:
243 /* The beginning. We expect the special declarations, EOL or
244 `CHARMAP'. */
245 if (nowtok == tok_eol)
246 /* Ignore empty lines. */
247 continue;
248
249 if (nowtok == tok_charmap)
250 {
251 from_name = NULL;
252 to_name = NULL;
253
254 /* We have to set up the real work. Fill in some
255 default values. */
256 if (result->mb_cur_max == 0)
257 result->mb_cur_max = 1;
258 if (result->mb_cur_min == 0)
259 result->mb_cur_min = result->mb_cur_max;
880f421f 260 if (result->mb_cur_min > result->mb_cur_max)
19bc17a9 261 {
880f421f
UD
262 if (!be_quiet)
263 error (0, 0, _("\
19bc17a9 264%s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
880f421f 265 cmfile->fname);
19bc17a9
RM
266
267 result->mb_cur_min = result->mb_cur_max;
268 }
269
270 lr_ignore_rest (cmfile, 1);
271
69f155d4
UD
272 /* Read the repertoire map now. */
273 if (result->repertoiremap == NULL)
274 /* This is fatal. */
275 error (4, 0, _("no repertoire map specified: cannot proceed"));
276
277 result->repertoire = repertoire_read (result->repertoiremap);
278 if (result->repertoire == NULL)
279 /* This is also fatal. */
280 error (4, errno, _("cannot read repertoire map `%s'"),
281 result->repertoiremap);
282
19bc17a9
RM
283 state = 2;
284 continue;
285 }
286
287 if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
288 && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
289 && nowtok != tok_comment_char && nowtok != tok_g0esc
290 && nowtok != tok_g1esc && nowtok != tok_g2esc
69f155d4 291 && nowtok != tok_g3esc && nowtok != tok_repertoiremap)
19bc17a9
RM
292 {
293 lr_error (cmfile, _("syntax error in prolog: %s"),
294 _("illegal definition"));
295
296 lr_ignore_rest (cmfile, 0);
297 continue;
298 }
299
300 /* We know that we need an argument. */
301 arg = lr_token (cmfile, NULL);
302
303 switch (nowtok)
304 {
305 case tok_code_set_name:
306 if (arg->tok != tok_ident)
307 {
308 badarg:
309 lr_error (cmfile, _("syntax error in prolog: %s"),
310 _("bad argument"));
311
312 lr_ignore_rest (cmfile, 0);
313 continue;
314 }
315
316 result->code_set_name = obstack_copy0 (&result->mem_pool,
317 arg->val.str.start,
318 arg->val.str.len);
319
320 lr_ignore_rest (cmfile, 1);
321 continue;
322
69f155d4
UD
323 case tok_repertoiremap:
324 if (arg->tok != tok_ident)
325 goto badarg;
326
327 if (result->repertoiremap == NULL)
328 result->repertoiremap = obstack_copy0 (&result->mem_pool,
329 arg->val.str.start,
330 arg->val.str.len);
331
332 lr_ignore_rest (cmfile, 1);
333 continue;
334
19bc17a9
RM
335 case tok_mb_cur_max:
336 case tok_mb_cur_min:
337 if (arg->tok != tok_number)
338 goto badarg;
339
340 if (arg->val.num < 1 || arg->val.num > 4)
341 {
342 lr_error (cmfile,
343 _("value for <%s> must lie between 1 and 4"),
344 nowtok == tok_mb_cur_min ? "mb_cur_min"
345 : "mb_cur_max");
346
347 lr_ignore_rest (cmfile, 0);
348 continue;
349 }
350 if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
ba1ffaa1 351 && (int) arg->val.num < result->mb_cur_min)
19bc17a9 352 || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
ba1ffaa1 353 && (int) arg->val.num > result->mb_cur_max))
19bc17a9
RM
354 {
355 lr_error (cmfile, _("\
356value of <mb_cur_max> must be greater than the value of <mb_cur_min>"));
357
358 lr_ignore_rest (cmfile, 0);
359 continue;
360 }
361
362 if (nowtok == tok_mb_cur_max)
363 result->mb_cur_max = arg->val.num;
364 else
365 result->mb_cur_min = arg->val.num;
366
367 lr_ignore_rest (cmfile, 1);
368 continue;
369
370 case tok_escape_char:
371 case tok_comment_char:
372 if (arg->tok != tok_ident)
373 goto badarg;
374
375 if (arg->val.str.len != 1)
376 {
377 lr_error (cmfile, _("\
378argument to <%s> must be a single character"),
379 nowtok == tok_escape_char ? "escape_char"
380 : "comment_char");
381
382 lr_ignore_rest (cmfile, 0);
383 continue;
384 }
385
386 if (nowtok == tok_escape_char)
387 cmfile->escape_char = *arg->val.str.start;
388 else
389 cmfile->comment_char = *arg->val.str.start;
390
391 lr_ignore_rest (cmfile, 1);
392 continue;
393
394 case tok_g0esc:
395 case tok_g1esc:
396 case tok_g2esc:
397 case tok_g3esc:
398 lr_ignore_rest (cmfile, 0); /* XXX */
399 continue;
400
401 default:
402 /* Cannot happen. */
403 assert (! "Should not happen");
404 }
405 break;
406
407 case 2:
408 /* We have seen `CHARMAP' and now are in the body. Each line
409 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
410 if (nowtok == tok_eol)
411 /* Ignore empty lines. */
412 continue;
413
414 if (nowtok == tok_end)
415 {
416 expected_tok = tok_charmap;
417 expected_str = "CHARMAP";
418 state = 90;
419 continue;
420 }
421
422 if (nowtok != tok_bsymbol)
423 {
424 lr_error (cmfile, _("syntax error in %s definition: %s"),
425 "CHARMAP", _("no symbolic name given"));
426
427 lr_ignore_rest (cmfile, 0);
428 continue;
429 }
430
431 /* If the previous line was not completely correct free the
432 used memory. */
433 if (from_name != NULL)
434 obstack_free (&result->mem_pool, from_name);
435
436 from_name = (char *) obstack_copy0 (&result->mem_pool,
437 now->val.str.start,
438 now->val.str.len);
439 to_name = NULL;
440
441 state = 3;
442 continue;
443
444 case 3:
445 /* We have two possibilities: We can see an ellipsis or an
446 encoding value. */
447 if (nowtok == tok_ellipsis)
448 {
449 state = 4;
450 continue;
451 }
452 /* FALLTHROUGH */
453
454 case 5:
455 if (nowtok != tok_charcode && nowtok != tok_ucs2
456 && nowtok != tok_ucs4)
457 {
458 lr_error (cmfile, _("syntax error in %s definition: %s"),
459 "CHARMAP", _("illegal encoding given"));
460
461 lr_ignore_rest (cmfile, 0);
462
463 state = 2;
464 continue;
465 }
466
69f155d4
UD
467 if (now->val.charcode.nbytes < result->mb_cur_min)
468 lr_error (cmfile, _("too few bytes in character encoding"));
469 else if (now->val.charcode.nbytes > result->mb_cur_max)
470 lr_error (cmfile, _("too many bytes in character encoding"));
19bc17a9 471 else
69f155d4
UD
472 charset_new_char (cmfile, &result->char_table,
473 now->val.charcode.nbytes,
474 now->val.charcode.val, from_name, to_name);
19bc17a9
RM
475
476 /* Ignore trailing comment silently. */
477 lr_ignore_rest (cmfile, 0);
478
479 from_name = NULL;
480 to_name = NULL;
481
482 state = 2;
483 continue;
484
485 case 4:
486 if (nowtok != tok_bsymbol)
487 {
488 lr_error (cmfile, _("syntax error in %s definition: %s"),
489 "CHARMAP",
490 _("no symbolic name given for end of range"));
491
492 lr_ignore_rest (cmfile, 0);
493 continue;
494 }
495
69f155d4 496 /* Copy the to-name in a safe place. */
19bc17a9
RM
497 to_name = (char *) obstack_copy0 (&result->mem_pool,
498 cmfile->token.val.str.start,
499 cmfile->token.val.str.len);
500
74015205 501 state = 5;
19bc17a9
RM
502 continue;
503
504 case 90:
505 if (nowtok != expected_tok)
506 lr_error (cmfile, _("\
507`%1$s' definition does not end with `END %1$s'"), expected_str);
508
509 lr_ignore_rest (cmfile, nowtok == expected_tok);
510 state = 91;
511 continue;
512
513 case 91:
514 /* Waiting for WIDTH... */
75cd5204
RM
515 if (nowtok == tok_eol)
516 /* Ignore empty lines. */
517 continue;
518
19bc17a9
RM
519 if (nowtok == tok_width_default)
520 {
521 state = 92;
522 continue;
523 }
524
525 if (nowtok == tok_width)
526 {
527 lr_ignore_rest (cmfile, 1);
528 state = 93;
529 continue;
530 }
531
532 if (nowtok == tok_width_variable)
533 {
534 lr_ignore_rest (cmfile, 1);
535 state = 98;
536 continue;
537 }
538
539 lr_error (cmfile, _("\
540only WIDTH definitions are allowed to follow the CHARMAP definition"));
541
542 lr_ignore_rest (cmfile, 0);
543 continue;
544
545 case 92:
546 if (nowtok != tok_number)
547 lr_error (cmfile, _("value for %s must be an integer"),
548 "WIDTH_DEFAULT");
549 else
550 result->width_default = now->val.num;
551
552 lr_ignore_rest (cmfile, nowtok == tok_number);
553
554 state = 91;
555 continue;
556
557 case 93:
558 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
559 "%s...%s %d\n". */
560 if (nowtok == tok_eol)
561 /* ignore empty lines. */
562 continue;
563
564 if (nowtok == tok_end)
565 {
566 expected_tok = tok_width;
567 expected_str = "WIDTH";
568 state = 90;
569 continue;
570 }
571
572 if (nowtok != tok_bsymbol)
573 {
574 lr_error (cmfile, _("syntax error in %s definition: %s"),
575 "WIDTH", _("no symbolic name given"));
576
577 lr_ignore_rest (cmfile, 0);
578 continue;
579 }
580
581 if (from_name != NULL)
582 obstack_free (&result->mem_pool, from_name);
583
584 from_name = (char *) obstack_copy0 (&result->mem_pool,
585 now->val.str.start,
586 now->val.str.len);
587 to_name = NULL;
588
589 state = 94;
590 continue;
591
592 case 94:
593 if (nowtok == tok_ellipsis)
75cd5204
RM
594 {
595 state = 95;
596 continue;
597 }
19bc17a9
RM
598
599 case 96:
600 if (nowtok != tok_number)
601 lr_error (cmfile, _("value for %s must be an integer"),
602 "WIDTH");
603 else
604 {
75cd5204
RM
605 /* Store width for chars. */
606 new_width (cmfile, result, from_name, to_name, now->val.num);
607
19bc17a9 608 from_name = NULL;
75cd5204 609 to_name = NULL;
19bc17a9
RM
610 }
611
612 lr_ignore_rest (cmfile, nowtok == tok_number);
613
614 state = 93;
615 continue;
616
617 case 95:
618 if (nowtok != tok_bsymbol)
619 {
620 lr_error (cmfile, _("syntax error in %s definition: %s"),
621 "WIDTH", _("no symbolic name given for end of range"));
622
623 lr_ignore_rest (cmfile, 0);
624
625 state = 93;
626 continue;
627 }
628
629 to_name = (char *) obstack_copy0 (&result->mem_pool,
630 now->val.str.start,
631 now->val.str.len);
632
19bc17a9
RM
633 state = 96;
634 continue;
635
636 case 98:
637 /* We now expect `END WIDTH_VARIABLE' or lines of the format
638 "%s\n" or "%s...%s\n". */
639 if (nowtok == tok_eol)
640 /* ignore empty lines. */
641 continue;
642
643 if (nowtok == tok_end)
644 {
645 expected_tok = tok_width_variable;
646 expected_str = "WIDTH_VARIABLE";
647 state = 90;
648 continue;
649 }
650
651 if (nowtok != tok_bsymbol)
652 {
653 lr_error (cmfile, _("syntax error in %s definition: %s"),
654 "WIDTH_VARIABLE", _("no symbolic name given"));
655
656 lr_ignore_rest (cmfile, 0);
657
658 continue;
659 }
660
661 if (from_name != NULL)
662 obstack_free (&result->mem_pool, from_name);
663
664 from_name = (char *) obstack_copy0 (&result->mem_pool,
665 now->val.str.start,
666 now->val.str.len);
667 to_name = NULL;
668
669 state = 99;
670 continue;
671
672 case 99:
673 if (nowtok == tok_ellipsis)
674 state = 100;
675
676 /* Store info. */
677 from_name = NULL;
678
679 /* Warn */
680 state = 98;
681 continue;
682
683 case 100:
684 if (nowtok != tok_bsymbol)
685 lr_error (cmfile, _("syntax error in %s definition: %s"),
686 "WIDTH_VARIABLE",
687 _("no symbolic name given for end of range"));
688 else
689 {
690 to_name = (char *) obstack_copy0 (&result->mem_pool,
691 now->val.str.start,
692 now->val.str.len);
693 /* XXX Enter value into table. */
694 }
695
696 lr_ignore_rest (cmfile, nowtok == tok_bsymbol);
697
698 state = 98;
699 continue;
700
701 default:
702 error (5, 0, _("%s: error in state machine"), __FILE__);
703 /* NOTREACHED */
704 }
705 break;
706 }
707
c84142e8 708 if (state != 91 && !be_quiet)
19bc17a9
RM
709 error (0, 0, _("%s: premature end of file"), cmfile->fname);
710
711 lr_close (cmfile);
712
713 return result;
714}
75cd5204
RM
715
716
717static void
718new_width (struct linereader *cmfile, struct charset_t *result,
719 const char *from, const char *to, unsigned long int width)
720{
721 unsigned int from_val, to_val;
722
69f155d4 723 from_val = charset_find_value (&result->char_table, from, strlen (from));
ba1ffaa1 724 if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE)
75cd5204
RM
725 {
726 lr_error (cmfile, _("unknown character `%s'"), from);
727 return;
728 }
729
730 if (to == NULL)
731 to_val = from_val;
732 else
733 {
69f155d4 734 to_val = charset_find_value (&result->char_table, to, strlen (to));
ba1ffaa1 735 if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE)
75cd5204
RM
736 {
737 lr_error (cmfile, _("unknown character `%s'"), to);
738 return;
739 }
740 }
741
742 if (result->nwidth_rules >= result->nwidth_rules_max)
743 {
744 size_t new_size = result->nwidth_rules + 32;
745 struct width_rule *new_rules =
746 (struct width_rule *) obstack_alloc (&result->mem_pool,
747 (new_size
748 * sizeof (struct width_rule)));
749
750 memcpy (new_rules, result->width_rules,
751 result->nwidth_rules_max * sizeof (struct width_rule));
752
753 result->width_rules = new_rules;
754 result->nwidth_rules_max = new_size;
755 }
756
757 result->width_rules[result->nwidth_rules].from = from_val;
758 result->width_rules[result->nwidth_rules].to = to_val;
759 result->width_rules[result->nwidth_rules].width = (unsigned int) width;
760 ++result->nwidth_rules;
761}