]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/charmap.c
Update.
[thirdparty/glibc.git] / locale / programs / charmap.c
1 /* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <ctype.h>
25 #include <dirent.h>
26 #include <errno.h>
27 #include <libintl.h>
28 #include <obstack.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "error.h"
34 #include "linereader.h"
35 #include "charset.h"
36
37
38 /* Uncomment following line for production version. */
39 /* define NDEBUG 1 */
40 #include <assert.h>
41
42
43 /* Define the lookup function. */
44 #include "charmap-kw.h"
45
46
47 extern void *xmalloc (size_t __n);
48
49 /* Prototypes for local functions. */
50 static struct charset_t *parse_charmap (const char *filename);
51 static void new_width (struct linereader *cmfile, struct charset_t *result,
52 const char *from, const char *to,
53 unsigned long int width);
54
55
56 struct charset_t *
57 charmap_read (const char *filename)
58 {
59 const char *pathnfile;
60 struct charset_t *result = NULL;
61
62 if (filename != NULL)
63 {
64 if (euidaccess (filename, R_OK) >= 0)
65 pathnfile = filename;
66 else if (filename[0] != '/')
67 {
68 char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
69 stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
70
71 pathnfile = (const char *) cp;
72 }
73 else
74 pathnfile = NULL;
75
76 if (pathnfile != NULL)
77 {
78 result = parse_charmap (pathnfile);
79
80 if (result == NULL && !be_quiet)
81 error (0, errno, _("character map file `%s' not found"), filename);
82 }
83 }
84
85 if (result == NULL)
86 {
87 /* OK, one more try. We also accept the names given to the
88 character sets in the files. Sometimes they differ from the
89 file name. */
90 DIR *dir;
91 struct dirent *dirent;
92
93 dir = opendir (CHARMAP_PATH);
94 if (dir == NULL)
95 {
96 while ((dirent = readdir (dir)) != NULL)
97 if (strcmp (dirent->d_name, ".") != 0
98 && strcmp (dirent->d_name, "..") != 0)
99 {
100 char buf[sizeof (CHARMAP_PATH)
101 + strlen (dirent->d_name) + 1];
102 FILE *fp;
103 #ifdef _DIRENT_HAVE_D_TYPE
104 if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_REG)
105 continue;
106 #endif
107 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"),
108 dirent->d_name);
109
110 fp = fopen (buf, "r");
111 if (fp != NULL)
112 {
113 char *name = NULL;
114
115 while (!feof (fp))
116 {
117 char junk[BUFSIZ];
118
119 if (fscanf (fp, " <code_set_name> %as", &name) == 1
120 || fscanf (fp, "%% alias %as", &name) == 1)
121 {
122 if (strcasecmp (name, filename) == 0)
123 break;
124
125 free (name);
126 name = NULL;
127 }
128
129 if (fgets (junk, sizeof junk, fp) != NULL)
130 {
131 if (strstr (junk, "CHARMAP") != NULL)
132 /* We cannot expect more aliases from now on. */
133 break;
134
135 while (strchr (junk, '\n') == NULL
136 && fgets (junk, sizeof junk, fp) != NULL)
137 continue;
138 }
139 }
140
141 fclose (fp);
142
143 if (name != NULL)
144 {
145 result = parse_charmap (buf);
146
147 free (buf);
148
149 if (result)
150 return result;
151
152 break;
153 }
154 }
155 }
156
157 closedir (dir);
158 }
159 }
160
161 if (result == NULL)
162 {
163 pathnfile = CHARMAP_PATH "/" DEFAULT_CHARMAP;
164
165 result = parse_charmap (pathnfile);
166
167 if (result == NULL)
168 error (4, errno, _("default character map file `%s' not found"),
169 DEFAULT_CHARMAP);
170 }
171
172 return result;
173 }
174
175
176 static struct charset_t *
177 parse_charmap (const char *filename)
178 {
179 struct linereader *cmfile;
180 struct charset_t *result;
181 int state;
182 enum token_t expected_tok = tok_error;
183 const char *expected_str = NULL;
184 char *from_name = NULL;
185 char *to_name = NULL;
186
187 /* Determine path. */
188 cmfile = lr_open (filename, charmap_hash);
189 if (cmfile == NULL)
190 {
191 if (strchr (filename, '/') == NULL)
192 {
193 /* Look in the systems charmap directory. */
194 char *buf = xmalloc (strlen (filename) + 1 + sizeof (CHARMAP_PATH));
195
196 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"), filename);
197 cmfile = lr_open (buf, charmap_hash);
198
199 if (cmfile == NULL)
200 free (buf);
201 }
202
203 if (cmfile == NULL)
204 return NULL;
205 }
206
207 /* Allocate room for result. */
208 result = (struct charset_t *) xmalloc (sizeof (struct charset_t));
209 memset (result, '\0', sizeof (struct charset_t));
210 /* The default DEFAULT_WIDTH is 1. */
211 result->width_default = 1;
212
213 #define obstack_chunk_alloc malloc
214 #define obstack_chunk_free free
215 obstack_init (&result->mem_pool);
216
217 if (init_hash (&result->char_table, 256))
218 {
219 free (result);
220 return NULL;
221 }
222
223 /* We use a state machine to describe the charmap description file
224 format. */
225 state = 1;
226 while (1)
227 {
228 /* What's on? */
229 struct token *now = lr_token (cmfile, NULL);
230 enum token_t nowtok = now->tok;
231 struct token *arg;
232
233 if (nowtok == tok_eof)
234 break;
235
236 switch (state)
237 {
238 case 1:
239 /* The beginning. We expect the special declarations, EOL or
240 `CHARMAP'. */
241 if (nowtok == tok_eol)
242 /* Ignore empty lines. */
243 continue;
244
245 if (nowtok == tok_charmap)
246 {
247 from_name = NULL;
248 to_name = NULL;
249
250 /* We have to set up the real work. Fill in some
251 default values. */
252 if (result->mb_cur_max == 0)
253 result->mb_cur_max = 1;
254 if (result->mb_cur_min == 0)
255 result->mb_cur_min = result->mb_cur_max;
256 if (result->mb_cur_min > result->mb_cur_max && !be_quiet)
257 {
258 error (0, 0, _("\
259 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
260 cmfile->fname);
261
262 result->mb_cur_min = result->mb_cur_max;
263 }
264
265 lr_ignore_rest (cmfile, 1);
266
267 state = 2;
268 continue;
269 }
270
271 if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
272 && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
273 && nowtok != tok_comment_char && nowtok != tok_g0esc
274 && nowtok != tok_g1esc && nowtok != tok_g2esc
275 && nowtok != tok_g3esc)
276 {
277 lr_error (cmfile, _("syntax error in prolog: %s"),
278 _("illegal definition"));
279
280 lr_ignore_rest (cmfile, 0);
281 continue;
282 }
283
284 /* We know that we need an argument. */
285 arg = lr_token (cmfile, NULL);
286
287 switch (nowtok)
288 {
289 case tok_code_set_name:
290 if (arg->tok != tok_ident)
291 {
292 badarg:
293 lr_error (cmfile, _("syntax error in prolog: %s"),
294 _("bad argument"));
295
296 lr_ignore_rest (cmfile, 0);
297 continue;
298 }
299
300 result->code_set_name = obstack_copy0 (&result->mem_pool,
301 arg->val.str.start,
302 arg->val.str.len);
303
304 lr_ignore_rest (cmfile, 1);
305 continue;
306
307 case tok_mb_cur_max:
308 case tok_mb_cur_min:
309 if (arg->tok != tok_number)
310 goto badarg;
311
312 if (arg->val.num < 1 || arg->val.num > 4)
313 {
314 lr_error (cmfile,
315 _("value for <%s> must lie between 1 and 4"),
316 nowtok == tok_mb_cur_min ? "mb_cur_min"
317 : "mb_cur_max");
318
319 lr_ignore_rest (cmfile, 0);
320 continue;
321 }
322 if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
323 && (int) arg->val.num < result->mb_cur_min)
324 || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
325 && (int) arg->val.num > result->mb_cur_max))
326 {
327 lr_error (cmfile, _("\
328 value of <mb_cur_max> must be greater than the value of <mb_cur_min>"));
329
330 lr_ignore_rest (cmfile, 0);
331 continue;
332 }
333
334 if (nowtok == tok_mb_cur_max)
335 result->mb_cur_max = arg->val.num;
336 else
337 result->mb_cur_min = arg->val.num;
338
339 lr_ignore_rest (cmfile, 1);
340 continue;
341
342 case tok_escape_char:
343 case tok_comment_char:
344 if (arg->tok != tok_ident)
345 goto badarg;
346
347 if (arg->val.str.len != 1)
348 {
349 lr_error (cmfile, _("\
350 argument to <%s> must be a single character"),
351 nowtok == tok_escape_char ? "escape_char"
352 : "comment_char");
353
354 lr_ignore_rest (cmfile, 0);
355 continue;
356 }
357
358 if (nowtok == tok_escape_char)
359 cmfile->escape_char = *arg->val.str.start;
360 else
361 cmfile->comment_char = *arg->val.str.start;
362
363 lr_ignore_rest (cmfile, 1);
364 continue;
365
366 case tok_g0esc:
367 case tok_g1esc:
368 case tok_g2esc:
369 case tok_g3esc:
370 lr_ignore_rest (cmfile, 0); /* XXX */
371 continue;
372
373 default:
374 /* Cannot happen. */
375 assert (! "Should not happen");
376 }
377 break;
378
379 case 2:
380 /* We have seen `CHARMAP' and now are in the body. Each line
381 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
382 if (nowtok == tok_eol)
383 /* Ignore empty lines. */
384 continue;
385
386 if (nowtok == tok_end)
387 {
388 expected_tok = tok_charmap;
389 expected_str = "CHARMAP";
390 state = 90;
391 continue;
392 }
393
394 if (nowtok != tok_bsymbol)
395 {
396 lr_error (cmfile, _("syntax error in %s definition: %s"),
397 "CHARMAP", _("no symbolic name given"));
398
399 lr_ignore_rest (cmfile, 0);
400 continue;
401 }
402
403 /* If the previous line was not completely correct free the
404 used memory. */
405 if (from_name != NULL)
406 obstack_free (&result->mem_pool, from_name);
407
408 from_name = (char *) obstack_copy0 (&result->mem_pool,
409 now->val.str.start,
410 now->val.str.len);
411 to_name = NULL;
412
413 state = 3;
414 continue;
415
416 case 3:
417 /* We have two possibilities: We can see an ellipsis or an
418 encoding value. */
419 if (nowtok == tok_ellipsis)
420 {
421 state = 4;
422 continue;
423 }
424 /* FALLTHROUGH */
425
426 case 5:
427 if (nowtok != tok_charcode && nowtok != tok_ucs2
428 && nowtok != tok_ucs4)
429 {
430 lr_error (cmfile, _("syntax error in %s definition: %s"),
431 "CHARMAP", _("illegal encoding given"));
432
433 lr_ignore_rest (cmfile, 0);
434
435 state = 2;
436 continue;
437 }
438
439 if (nowtok == tok_charcode)
440 /* Write char value in table. */
441 charset_new_char (cmfile, result, now->val.charcode.nbytes,
442 now->val.charcode.val, from_name, to_name);
443 else
444 /* Determine ISO 10646 value and write into table. */
445 charset_new_unicode (cmfile, result, now->val.charcode.nbytes,
446 now->val.charcode.val, from_name, to_name);
447
448 /* Ignore trailing comment silently. */
449 lr_ignore_rest (cmfile, 0);
450
451 from_name = NULL;
452 to_name = NULL;
453
454 state = 2;
455 continue;
456
457 case 4:
458 if (nowtok != tok_bsymbol)
459 {
460 lr_error (cmfile, _("syntax error in %s definition: %s"),
461 "CHARMAP",
462 _("no symbolic name given for end of range"));
463
464 lr_ignore_rest (cmfile, 0);
465 continue;
466 }
467
468 /* If the previous line was not completely correct free the
469 used memory. */
470 to_name = (char *) obstack_copy0 (&result->mem_pool,
471 cmfile->token.val.str.start,
472 cmfile->token.val.str.len);
473
474 state = 5;
475 continue;
476
477 case 90:
478 if (nowtok != expected_tok)
479 lr_error (cmfile, _("\
480 `%1$s' definition does not end with `END %1$s'"), expected_str);
481
482 lr_ignore_rest (cmfile, nowtok == expected_tok);
483 state = 91;
484 continue;
485
486 case 91:
487 /* Waiting for WIDTH... */
488 if (nowtok == tok_eol)
489 /* Ignore empty lines. */
490 continue;
491
492 if (nowtok == tok_width_default)
493 {
494 state = 92;
495 continue;
496 }
497
498 if (nowtok == tok_width)
499 {
500 lr_ignore_rest (cmfile, 1);
501 state = 93;
502 continue;
503 }
504
505 if (nowtok == tok_width_variable)
506 {
507 lr_ignore_rest (cmfile, 1);
508 state = 98;
509 continue;
510 }
511
512 lr_error (cmfile, _("\
513 only WIDTH definitions are allowed to follow the CHARMAP definition"));
514
515 lr_ignore_rest (cmfile, 0);
516 continue;
517
518 case 92:
519 if (nowtok != tok_number)
520 lr_error (cmfile, _("value for %s must be an integer"),
521 "WIDTH_DEFAULT");
522 else
523 result->width_default = now->val.num;
524
525 lr_ignore_rest (cmfile, nowtok == tok_number);
526
527 state = 91;
528 continue;
529
530 case 93:
531 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
532 "%s...%s %d\n". */
533 if (nowtok == tok_eol)
534 /* ignore empty lines. */
535 continue;
536
537 if (nowtok == tok_end)
538 {
539 expected_tok = tok_width;
540 expected_str = "WIDTH";
541 state = 90;
542 continue;
543 }
544
545 if (nowtok != tok_bsymbol)
546 {
547 lr_error (cmfile, _("syntax error in %s definition: %s"),
548 "WIDTH", _("no symbolic name given"));
549
550 lr_ignore_rest (cmfile, 0);
551 continue;
552 }
553
554 if (from_name != NULL)
555 obstack_free (&result->mem_pool, from_name);
556
557 from_name = (char *) obstack_copy0 (&result->mem_pool,
558 now->val.str.start,
559 now->val.str.len);
560 to_name = NULL;
561
562 state = 94;
563 continue;
564
565 case 94:
566 if (nowtok == tok_ellipsis)
567 {
568 state = 95;
569 continue;
570 }
571
572 case 96:
573 if (nowtok != tok_number)
574 lr_error (cmfile, _("value for %s must be an integer"),
575 "WIDTH");
576 else
577 {
578 /* Store width for chars. */
579 new_width (cmfile, result, from_name, to_name, now->val.num);
580
581 from_name = NULL;
582 to_name = NULL;
583 }
584
585 lr_ignore_rest (cmfile, nowtok == tok_number);
586
587 state = 93;
588 continue;
589
590 case 95:
591 if (nowtok != tok_bsymbol)
592 {
593 lr_error (cmfile, _("syntax error in %s definition: %s"),
594 "WIDTH", _("no symbolic name given for end of range"));
595
596 lr_ignore_rest (cmfile, 0);
597
598 state = 93;
599 continue;
600 }
601
602 to_name = (char *) obstack_copy0 (&result->mem_pool,
603 now->val.str.start,
604 now->val.str.len);
605
606 state = 96;
607 continue;
608
609 case 98:
610 /* We now expect `END WIDTH_VARIABLE' or lines of the format
611 "%s\n" or "%s...%s\n". */
612 if (nowtok == tok_eol)
613 /* ignore empty lines. */
614 continue;
615
616 if (nowtok == tok_end)
617 {
618 expected_tok = tok_width_variable;
619 expected_str = "WIDTH_VARIABLE";
620 state = 90;
621 continue;
622 }
623
624 if (nowtok != tok_bsymbol)
625 {
626 lr_error (cmfile, _("syntax error in %s definition: %s"),
627 "WIDTH_VARIABLE", _("no symbolic name given"));
628
629 lr_ignore_rest (cmfile, 0);
630
631 continue;
632 }
633
634 if (from_name != NULL)
635 obstack_free (&result->mem_pool, from_name);
636
637 from_name = (char *) obstack_copy0 (&result->mem_pool,
638 now->val.str.start,
639 now->val.str.len);
640 to_name = NULL;
641
642 state = 99;
643 continue;
644
645 case 99:
646 if (nowtok == tok_ellipsis)
647 state = 100;
648
649 /* Store info. */
650 from_name = NULL;
651
652 /* Warn */
653 state = 98;
654 continue;
655
656 case 100:
657 if (nowtok != tok_bsymbol)
658 lr_error (cmfile, _("syntax error in %s definition: %s"),
659 "WIDTH_VARIABLE",
660 _("no symbolic name given for end of range"));
661 else
662 {
663 to_name = (char *) obstack_copy0 (&result->mem_pool,
664 now->val.str.start,
665 now->val.str.len);
666 /* XXX Enter value into table. */
667 }
668
669 lr_ignore_rest (cmfile, nowtok == tok_bsymbol);
670
671 state = 98;
672 continue;
673
674 default:
675 error (5, 0, _("%s: error in state machine"), __FILE__);
676 /* NOTREACHED */
677 }
678 break;
679 }
680
681 if (state != 91 && !be_quiet)
682 error (0, 0, _("%s: premature end of file"), cmfile->fname);
683
684 lr_close (cmfile);
685
686 return result;
687 }
688
689
690 static void
691 new_width (struct linereader *cmfile, struct charset_t *result,
692 const char *from, const char *to, unsigned long int width)
693 {
694 unsigned int from_val, to_val;
695
696 from_val = charset_find_value (result, from, strlen (from));
697 if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE)
698 {
699 lr_error (cmfile, _("unknown character `%s'"), from);
700 return;
701 }
702
703 if (to == NULL)
704 to_val = from_val;
705 else
706 {
707 to_val = charset_find_value (result, to, strlen (to));
708 if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE)
709 {
710 lr_error (cmfile, _("unknown character `%s'"), to);
711 return;
712 }
713 }
714
715 if (result->nwidth_rules >= result->nwidth_rules_max)
716 {
717 size_t new_size = result->nwidth_rules + 32;
718 struct width_rule *new_rules =
719 (struct width_rule *) obstack_alloc (&result->mem_pool,
720 (new_size
721 * sizeof (struct width_rule)));
722
723 memcpy (new_rules, result->width_rules,
724 result->nwidth_rules_max * sizeof (struct width_rule));
725
726 result->width_rules = new_rules;
727 result->nwidth_rules_max = new_size;
728 }
729
730 result->width_rules[result->nwidth_rules].from = from_val;
731 result->width_rules[result->nwidth_rules].to = to_val;
732 result->width_rules[result->nwidth_rules].width = (unsigned int) width;
733 ++result->nwidth_rules;
734 }