]>
git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/charmap.c
1 /* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
35 #include "linereader.h"
38 #include "repertoire.h"
43 /* Define the lookup function. */
44 #include "charmap-kw.h"
47 extern void *xmalloc (size_t __n
);
49 /* Prototypes for local functions. */
50 static struct charmap_t
*parse_charmap (const char *filename
);
51 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
52 const char *from
, const char *to
,
53 unsigned long int width
);
54 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
55 int nbytes
, char *bytes
, const char *from
,
56 const char *to
, int decimal_ellipsis
);
60 charmap_read (const char *filename
)
62 const char *pathnfile
;
63 struct charmap_t
*result
= NULL
;
67 if (euidaccess (filename
, R_OK
) >= 0)
69 else if (filename
[0] != '/')
71 char *cp
= xmalloc (strlen (filename
) + sizeof CHARMAP_PATH
+ 1);
72 stpcpy (stpcpy (stpcpy (cp
, CHARMAP_PATH
), "/"), filename
);
74 pathnfile
= (const char *) cp
;
79 if (pathnfile
!= NULL
)
81 result
= parse_charmap (pathnfile
);
83 if (result
== NULL
&& !be_quiet
)
84 error (0, errno
, _("character map file `%s' not found"), filename
);
90 /* OK, one more try. We also accept the names given to the
91 character sets in the files. Sometimes they differ from the
94 struct dirent
*dirent
;
96 dir
= opendir (CHARMAP_PATH
);
99 while ((dirent
= readdir (dir
)) != NULL
)
100 if (strcmp (dirent
->d_name
, ".") != 0
101 && strcmp (dirent
->d_name
, "..") != 0)
103 char buf
[sizeof (CHARMAP_PATH
)
104 + strlen (dirent
->d_name
) + 1];
106 #ifdef _DIRENT_HAVE_D_TYPE
107 if (dirent
->d_type
!= DT_UNKNOWN
&& dirent
->d_type
!= DT_REG
)
110 stpcpy (stpcpy (stpcpy (buf
, CHARMAP_PATH
), "/"),
113 fp
= fopen (buf
, "r");
122 if (fscanf (fp
, " <code_set_name> %as", &name
) == 1
123 || fscanf (fp
, "%% alias %as", &name
) == 1)
125 if (strcasecmp (name
, filename
) == 0)
132 if (fgets (junk
, sizeof junk
, fp
) != NULL
)
134 if (strstr (junk
, "CHARMAP") != NULL
)
135 /* We cannot expect more aliases from now on. */
138 while (strchr (junk
, '\n') == NULL
139 && fgets (junk
, sizeof junk
, fp
) != NULL
)
148 result
= parse_charmap (buf
);
166 pathnfile
= CHARMAP_PATH
"/" DEFAULT_CHARMAP
;
168 result
= parse_charmap (pathnfile
);
171 error (4, errno
, _("default character map file `%s' not found"),
179 static struct charmap_t
*
180 parse_charmap (const char *filename
)
182 struct linereader
*cmfile
;
183 struct charmap_t
*result
;
185 enum token_t expected_tok
= tok_error
;
186 const char *expected_str
= NULL
;
187 char *from_name
= NULL
;
188 char *to_name
= NULL
;
189 enum token_t ellipsis
= 0;
191 /* Determine path. */
192 cmfile
= lr_open (filename
, charmap_hash
);
195 if (strchr (filename
, '/') == NULL
)
197 /* Look in the systems charmap directory. */
198 char *buf
= xmalloc (strlen (filename
) + 1 + sizeof (CHARMAP_PATH
));
200 stpcpy (stpcpy (stpcpy (buf
, CHARMAP_PATH
), "/"), filename
);
201 cmfile
= lr_open (buf
, charmap_hash
);
211 /* We don't want symbolic names in string to be translated. */
212 cmfile
->translate_strings
= 0;
214 /* Allocate room for result. */
215 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
216 memset (result
, '\0', sizeof (struct charmap_t
));
217 /* The default DEFAULT_WIDTH is 1. */
218 result
->width_default
= 1;
220 #define obstack_chunk_alloc malloc
221 #define obstack_chunk_free free
222 obstack_init (&result
->mem_pool
);
224 if (init_hash (&result
->char_table
, 256)
225 || init_hash (&result
->byte_table
, 256))
231 /* We use a state machine to describe the charmap description file
237 struct token
*now
= lr_token (cmfile
, NULL
, NULL
);
238 enum token_t nowtok
= now
->tok
;
241 if (nowtok
== tok_eof
)
247 /* The beginning. We expect the special declarations, EOL or
249 if (nowtok
== tok_eol
)
250 /* Ignore empty lines. */
253 if (nowtok
== tok_charmap
)
258 /* We have to set up the real work. Fill in some
260 if (result
->mb_cur_max
== 0)
261 result
->mb_cur_max
= 1;
262 if (result
->mb_cur_min
== 0)
263 result
->mb_cur_min
= result
->mb_cur_max
;
264 if (result
->mb_cur_min
> result
->mb_cur_max
)
268 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
271 result
->mb_cur_min
= result
->mb_cur_max
;
274 lr_ignore_rest (cmfile
, 1);
280 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
281 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
282 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
283 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
284 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
285 && nowtok
!= tok_include
)
287 lr_error (cmfile
, _("syntax error in prolog: %s"),
288 _("invalid definition"));
290 lr_ignore_rest (cmfile
, 0);
294 /* We know that we need an argument. */
295 arg
= lr_token (cmfile
, NULL
, NULL
);
299 case tok_code_set_name
:
300 case tok_repertoiremap
:
301 if (arg
->tok
!= tok_ident
)
304 lr_error (cmfile
, _("syntax error in prolog: %s"),
307 lr_ignore_rest (cmfile
, 0);
311 if (nowtok
== tok_code_set_name
)
312 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
313 arg
->val
.str
.startmb
,
316 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
317 arg
->val
.str
.startmb
,
320 lr_ignore_rest (cmfile
, 1);
325 if (arg
->tok
!= tok_number
)
329 && ((nowtok
== tok_mb_cur_max
330 && result
->mb_cur_max
!= 0)
331 || (nowtok
== tok_mb_cur_max
332 && result
->mb_cur_max
!= 0)))
333 lr_error (cmfile
, _("duplicate definition of <%s>"),
334 nowtok
== tok_mb_cur_min
335 ? "mb_cur_min" : "mb_cur_max");
337 if (arg
->val
.num
< 1)
340 _("value for <%s> must be 1 or greater"),
341 nowtok
== tok_mb_cur_min
342 ? "mb_cur_min" : "mb_cur_max");
344 lr_ignore_rest (cmfile
, 0);
347 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
348 && (int) arg
->val
.num
< result
->mb_cur_min
)
349 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
350 && (int) arg
->val
.num
> result
->mb_cur_max
))
352 lr_error (cmfile
, _("\
353 value of <%s> must be greater or equal than the value of <%s>"),
354 "mb_cur_max", "mb_cur_min");
356 lr_ignore_rest (cmfile
, 0);
360 if (nowtok
== tok_mb_cur_max
)
361 result
->mb_cur_max
= arg
->val
.num
;
363 result
->mb_cur_min
= arg
->val
.num
;
365 lr_ignore_rest (cmfile
, 1);
368 case tok_escape_char
:
369 case tok_comment_char
:
370 if (arg
->tok
!= tok_ident
)
373 if (arg
->val
.str
.lenmb
!= 1)
375 lr_error (cmfile
, _("\
376 argument to <%s> must be a single character"),
377 nowtok
== tok_escape_char
? "escape_char"
380 lr_ignore_rest (cmfile
, 0);
384 if (nowtok
== tok_escape_char
)
385 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
387 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
389 lr_ignore_rest (cmfile
, 1);
397 lr_ignore_rest (cmfile
, 0); /* XXX */
401 lr_error (cmfile
, _("\
402 character sets with locking states are not supported"));
407 assert (! "Should not happen");
412 /* We have seen `CHARMAP' and now are in the body. Each line
413 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
414 if (nowtok
== tok_eol
)
415 /* Ignore empty lines. */
418 if (nowtok
== tok_end
)
420 expected_tok
= tok_charmap
;
421 expected_str
= "CHARMAP";
426 if (nowtok
!= tok_bsymbol
)
428 lr_error (cmfile
, _("syntax error in %s definition: %s"),
429 "CHARMAP", _("no symbolic name given"));
431 lr_ignore_rest (cmfile
, 0);
435 /* If the previous line was not completely correct free the
437 if (from_name
!= NULL
)
438 obstack_free (&result
->mem_pool
, from_name
);
440 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
441 now
->val
.str
.startmb
,
449 /* We have two possibilities: We can see an ellipsis or an
451 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
452 || nowtok
== tok_ellipsis2
)
461 if (nowtok
!= tok_charcode
)
463 lr_error (cmfile
, _("syntax error in %s definition: %s"),
464 "CHARMAP", _("invalid encoding given"));
466 lr_ignore_rest (cmfile
, 0);
472 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
473 lr_error (cmfile
, _("too few bytes in character encoding"));
474 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
475 lr_error (cmfile
, _("too many bytes in character encoding"));
477 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
478 now
->val
.charcode
.bytes
, from_name
, to_name
,
479 ellipsis
!= tok_ellipsis2
);
481 /* Ignore trailing comment silently. */
482 lr_ignore_rest (cmfile
, 0);
491 if (nowtok
!= tok_bsymbol
)
493 lr_error (cmfile
, _("syntax error in %s definition: %s"),
495 _("no symbolic name given for end of range"));
497 lr_ignore_rest (cmfile
, 0);
501 /* Copy the to-name in a safe place. */
502 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
503 cmfile
->token
.val
.str
.startmb
,
504 cmfile
->token
.val
.str
.lenmb
);
510 if (nowtok
!= expected_tok
)
511 lr_error (cmfile
, _("\
512 `%1$s' definition does not end with `END %1$s'"), expected_str
);
514 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
519 /* Waiting for WIDTH... */
520 if (nowtok
== tok_eol
)
521 /* Ignore empty lines. */
524 if (nowtok
== tok_width_default
)
530 if (nowtok
== tok_width
)
532 lr_ignore_rest (cmfile
, 1);
537 if (nowtok
== tok_width_variable
)
539 lr_ignore_rest (cmfile
, 1);
544 lr_error (cmfile
, _("\
545 only WIDTH definitions are allowed to follow the CHARMAP definition"));
547 lr_ignore_rest (cmfile
, 0);
551 if (nowtok
!= tok_number
)
552 lr_error (cmfile
, _("value for %s must be an integer"),
555 result
->width_default
= now
->val
.num
;
557 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
563 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
565 if (nowtok
== tok_eol
)
566 /* ignore empty lines. */
569 if (nowtok
== tok_end
)
571 expected_tok
= tok_width
;
572 expected_str
= "WIDTH";
577 if (nowtok
!= tok_bsymbol
)
579 lr_error (cmfile
, _("syntax error in %s definition: %s"),
580 "WIDTH", _("no symbolic name given"));
582 lr_ignore_rest (cmfile
, 0);
586 if (from_name
!= NULL
)
587 obstack_free (&result
->mem_pool
, from_name
);
589 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
590 now
->val
.str
.startmb
,
598 if (nowtok
== tok_ellipsis3
)
605 if (nowtok
!= tok_number
)
606 lr_error (cmfile
, _("value for %s must be an integer"),
610 /* Store width for chars. */
611 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
617 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
623 if (nowtok
!= tok_bsymbol
)
625 lr_error (cmfile
, _("syntax error in %s definition: %s"),
626 "WIDTH", _("no symbolic name given for end of range"));
628 lr_ignore_rest (cmfile
, 0);
634 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
635 now
->val
.str
.startmb
,
642 /* We now expect `END WIDTH_VARIABLE' or lines of the format
643 "%s\n" or "%s...%s\n". */
644 if (nowtok
== tok_eol
)
645 /* ignore empty lines. */
648 if (nowtok
== tok_end
)
650 expected_tok
= tok_width_variable
;
651 expected_str
= "WIDTH_VARIABLE";
656 if (nowtok
!= tok_bsymbol
)
658 lr_error (cmfile
, _("syntax error in %s definition: %s"),
659 "WIDTH_VARIABLE", _("no symbolic name given"));
661 lr_ignore_rest (cmfile
, 0);
666 if (from_name
!= NULL
)
667 obstack_free (&result
->mem_pool
, from_name
);
669 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
670 now
->val
.str
.startmb
,
678 if (nowtok
== tok_ellipsis3
)
689 if (nowtok
!= tok_bsymbol
)
690 lr_error (cmfile
, _("syntax error in %s definition: %s"),
692 _("no symbolic name given for end of range"));
695 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
696 now
->val
.str
.startmb
,
698 /* XXX Enter value into table. */
701 lr_ignore_rest (cmfile
, nowtok
== tok_bsymbol
);
707 error (5, 0, _("%s: error in state machine"), __FILE__
);
713 if (state
!= 91 && !be_quiet
)
714 error (0, 0, _("%s: premature end of file"), cmfile
->fname
);
723 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
724 const char *from
, const char *to
, unsigned long int width
)
726 struct charseq
*from_val
;
727 struct charseq
*to_val
;
729 from_val
= charmap_find_value (result
, from
, strlen (from
));
730 if (from_val
== NULL
)
732 lr_error (cmfile
, _("unknown character `%s'"), from
);
740 to_val
= charmap_find_value (result
, to
, strlen (to
));
743 lr_error (cmfile
, _("unknown character `%s'"), to
);
748 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
750 size_t new_size
= result
->nwidth_rules
+ 32;
751 struct width_rule
*new_rules
=
752 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
754 * sizeof (struct width_rule
)));
756 memcpy (new_rules
, result
->width_rules
,
757 result
->nwidth_rules_max
* sizeof (struct width_rule
));
759 result
->width_rules
= new_rules
;
760 result
->nwidth_rules_max
= new_size
;
763 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
764 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
765 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
766 ++result
->nwidth_rules
;
771 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
775 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
776 < 0 ? NULL
: (struct charseq
*) result
);
781 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
782 int nbytes
, char *bytes
, const char *from
, const char *to
,
783 int decimal_ellipsis
)
785 hash_table
*ht
= &cm
->char_table
;
786 hash_table
*bt
= &cm
->byte_table
;
787 struct obstack
*ob
= &cm
->mem_pool
;
791 int prefix_len
, len1
, len2
;
792 unsigned int from_nr
, to_nr
, cnt
;
793 struct charseq
*newp
;
795 len1
= strlen (from
);
799 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
800 newp
->nbytes
= nbytes
;
801 memcpy (newp
->bytes
, bytes
, nbytes
);
802 newp
->name
= obstack_copy (ob
, from
, len1
+ 1);
803 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
805 insert_entry (ht
, from
, len1
, newp
);
806 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
807 /* Please note that it isn't a bug if a symbol is defined more
808 than once. All later definitions are simply discarded. */
812 /* We have a range: the names must have names with equal prefixes
813 and an equal number of digits, where the second number is greater
814 or equal than the first. */
820 lr_error (lr
, _("invalid names for character range"));
824 cp
= &from
[len1
- 1];
825 if (decimal_ellipsis
)
826 while (isdigit (*cp
) && cp
>= from
)
829 while (isxdigit (*cp
) && cp
>= from
)
831 if (!isdigit (*cp
) && !isupper (*cp
))
833 hexadecimal range format should use only capital characters"));
837 prefix_len
= (cp
- from
) + 1;
839 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
843 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
844 if (*from_end
!= '\0' || (from_nr
== ULONG_MAX
&& errno
== ERANGE
)
845 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
846 decimal_ellipsis
? 10 : 16)) == ULONG_MAX
850 lr_error (lr
, _("<%s> and <%s> are illegal names for range"));
856 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
860 for (cnt
= from_nr
; cnt
<= to_nr
; ++cnt
)
863 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
864 prefix_len
, from
, len1
- prefix_len
, cnt
);
865 name_end
= obstack_finish (ob
);
867 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
868 newp
->nbytes
= nbytes
;
869 memcpy (newp
->bytes
, bytes
, nbytes
);
870 newp
->name
= name_end
;
871 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
873 insert_entry (ht
, name_end
, len1
, newp
);
874 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
875 /* Please note we don't examine the return value since it is no error
876 if we have two definitions for a symbol. */
878 /* Increment the value in the byte sequence. */
879 if (++bytes
[nbytes
- 1] == '\0')
887 _("resulting bytes for range not representable."));
890 while (++bytes
[b
--] == 0);
897 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
902 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
903 < 0 ? NULL
: (struct charseq
*) result
);