]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/repertoire.c
* iconvdata/gbk.c (BODY): Make buf and cp char instead of unsigned
[thirdparty/glibc.git] / locale / programs / repertoire.c
CommitLineData
43bc8ac6 1/* Copyright (C) 1998-2002,2004,2005 Free Software Foundation, Inc.
69f155d4
UD
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
4
43bc8ac6 5 This program is free software; you can redistribute it and/or modify
2e2efe65
RM
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
69f155d4 9
43bc8ac6 10 This program is distributed in the hope that it will be useful,
69f155d4 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
69f155d4 14
43bc8ac6
UD
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
69f155d4
UD
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <errno.h>
24#include <error.h>
25#include <limits.h>
4b10dd6c
UD
26#include <obstack.h>
27#include <search.h>
69f155d4
UD
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
f2b98f97 32#include "localedef.h"
69f155d4 33#include "linereader.h"
4b10dd6c 34#include "charmap.h"
69f155d4
UD
35#include "repertoire.h"
36#include "simple-hash.h"
69f155d4
UD
37
38
39/* Simple keyword hashing for the repertoiremap. */
4b10dd6c
UD
40static const struct keyword_t *repertoiremap_hash (const char *str,
41 unsigned int len);
42static void repertoire_new_char (struct linereader *lr, hash_table *ht,
43 hash_table *rt, struct obstack *ob,
44 uint32_t value, const char *from,
45 const char *to, int decimal_ellipsis);
46static int repertoire_compare (const void *p1, const void *p2);
47
48/* Already known repertoire maps. */
49static void *known;
69f155d4 50
3c833378
UD
51/* List of repertoire maps which are not available and which have been
52 reported to not be. */
53static void *unavailable;
54
69f155d4
UD
55
56struct repertoire_t *
57repertoire_read (const char *filename)
58{
12a9fabe 59 struct linereader *repfile;
69f155d4 60 struct repertoire_t *result;
4b10dd6c
UD
61 struct repertoire_t **resultp;
62 struct repertoire_t search;
69f155d4
UD
63 int state;
64 char *from_name = NULL;
65 char *to_name = NULL;
4b10dd6c
UD
66 enum token_t ellipsis = tok_none;
67
68 search.name = filename;
69 resultp = tfind (&search, &known, &repertoire_compare);
70 if (resultp != NULL)
71 return *resultp;
69f155d4
UD
72
73 /* Determine path. */
12a9fabe
UD
74 repfile = lr_open (filename, repertoiremap_hash);
75 if (repfile == NULL)
69f155d4
UD
76 {
77 if (strchr (filename, '/') == NULL)
78 {
4b10dd6c 79 char *i18npath = getenv ("I18NPATH");
12a9fabe
UD
80 if (i18npath != NULL && *i18npath != '\0')
81 {
db2f05ba
RM
82 const size_t pathlen = strlen (i18npath);
83 char i18npathbuf[pathlen + 1];
84 char path[strlen (filename) + 1 + pathlen
12a9fabe
UD
85 + sizeof ("/repertoiremaps/") - 1];
86 char *next;
db2f05ba 87 i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
69f155d4 88
12a9fabe
UD
89 while (repfile == NULL
90 && (next = strsep (&i18npath, ":")) != NULL)
91 {
92 stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"),
93 filename);
94
95 repfile = lr_open (path, repertoiremap_hash);
4b10dd6c
UD
96
97 if (repfile == NULL)
98 {
99 stpcpy (stpcpy (path, next), filename);
100
101 repfile = lr_open (path, repertoiremap_hash);
102 }
12a9fabe
UD
103 }
104 }
105
106 if (repfile == NULL)
107 {
108 /* Look in the systems charmap directory. */
109 char *buf = xmalloc (strlen (filename) + 1
110 + sizeof (REPERTOIREMAP_PATH));
111
112 stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"),
113 filename);
114 repfile = lr_open (buf, repertoiremap_hash);
115
116 if (repfile == NULL)
117 free (buf);
118 }
69f155d4
UD
119 }
120
12a9fabe 121 if (repfile == NULL)
3c833378 122 return NULL;
69f155d4
UD
123 }
124
4b10dd6c
UD
125 /* We don't want symbolic names in string to be translated. */
126 repfile->translate_strings = 0;
127
69f155d4
UD
128 /* Allocate room for result. */
129 result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
130 memset (result, '\0', sizeof (struct repertoire_t));
131
4b10dd6c
UD
132 result->name = xstrdup (filename);
133
69f155d4
UD
134#define obstack_chunk_alloc malloc
135#define obstack_chunk_free free
136 obstack_init (&result->mem_pool);
137
4b10dd6c
UD
138 if (init_hash (&result->char_table, 256)
139 || init_hash (&result->reverse_table, 256)
140 || init_hash (&result->seq_table, 256))
69f155d4
UD
141 {
142 free (result);
143 return NULL;
144 }
145
146 /* We use a state machine to describe the charmap description file
147 format. */
148 state = 1;
149 while (1)
150 {
151 /* What's on? */
47e8b443 152 struct token *now = lr_token (repfile, NULL, NULL, NULL, verbose);
69f155d4
UD
153 enum token_t nowtok = now->tok;
154 struct token *arg;
155
156 if (nowtok == tok_eof)
157 break;
158
159 switch (state)
160 {
161 case 1:
162 /* We haven't yet read any character definition. This is where
163 we accept escape_char and comment_char definitions. */
164 if (nowtok == tok_eol)
165 /* Ignore empty lines. */
166 continue;
167
168 if (nowtok == tok_escape_char || nowtok == tok_comment_char)
169 {
170 /* We know that we need an argument. */
47e8b443 171 arg = lr_token (repfile, NULL, NULL, NULL, verbose);
69f155d4
UD
172
173 if (arg->tok != tok_ident)
174 {
12a9fabe 175 lr_error (repfile, _("syntax error in prolog: %s"),
69f155d4
UD
176 _("bad argument"));
177
12a9fabe 178 lr_ignore_rest (repfile, 0);
69f155d4
UD
179 continue;
180 }
181
4b10dd6c 182 if (arg->val.str.lenmb != 1)
69f155d4 183 {
12a9fabe 184 lr_error (repfile, _("\
69f155d4
UD
185argument to <%s> must be a single character"),
186 nowtok == tok_escape_char ? "escape_char"
187 : "comment_char");
188
12a9fabe 189 lr_ignore_rest (repfile, 0);
69f155d4
UD
190 continue;
191 }
192
193 if (nowtok == tok_escape_char)
4b10dd6c 194 repfile->escape_char = *arg->val.str.startmb;
69f155d4 195 else
4b10dd6c 196 repfile->comment_char = *arg->val.str.startmb;
69f155d4 197
12a9fabe 198 lr_ignore_rest (repfile, 1);
69f155d4
UD
199 continue;
200 }
201
202 if (nowtok == tok_charids)
203 {
12a9fabe 204 lr_ignore_rest (repfile, 1);
69f155d4
UD
205
206 state = 2;
207 continue;
208 }
209
210 /* Otherwise we start reading the character definitions. */
211 state = 2;
212 /* FALLTHROUGH */
213
214 case 2:
215 /* We are now are in the body. Each line
216 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
217 if (nowtok == tok_eol)
218 /* Ignore empty lines. */
219 continue;
220
221 if (nowtok == tok_end)
222 {
223 state = 90;
224 continue;
225 }
226
227 if (nowtok != tok_bsymbol)
228 {
12a9fabe 229 lr_error (repfile,
69f155d4
UD
230 _("syntax error in repertoire map definition: %s"),
231 _("no symbolic name given"));
232
12a9fabe 233 lr_ignore_rest (repfile, 0);
69f155d4
UD
234 continue;
235 }
236
237 /* If the previous line was not completely correct free the
238 used memory. */
239 if (from_name != NULL)
240 obstack_free (&result->mem_pool, from_name);
241
242 from_name = (char *) obstack_copy0 (&result->mem_pool,
4b10dd6c
UD
243 now->val.str.startmb,
244 now->val.str.lenmb);
69f155d4
UD
245 to_name = NULL;
246
247 state = 3;
248 continue;
249
250 case 3:
251 /* We have two possibilities: We can see an ellipsis or an
252 encoding value. */
4b10dd6c
UD
253 if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
254 || nowtok == tok_ellipsis2)
69f155d4 255 {
4b10dd6c 256 ellipsis = nowtok;
69f155d4
UD
257 state = 4;
258 continue;
259 }
260 /* FALLTHROUGH */
261
262 case 5:
263 /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
264 the xxx mean a hexadecimal value. */
265 state = 2;
266
267 errno = 0;
4b10dd6c 268 if (nowtok != tok_ucs4)
69f155d4 269 {
12a9fabe 270 lr_error (repfile,
69f155d4
UD
271 _("syntax error in repertoire map definition: %s"),
272 _("no <Uxxxx> or <Uxxxxxxxx> value given"));
273
12a9fabe 274 lr_ignore_rest (repfile, 0);
69f155d4
UD
275 continue;
276 }
277
278 /* We've found a new valid definition. */
4b10dd6c
UD
279 repertoire_new_char (repfile, &result->char_table,
280 &result->reverse_table, &result->mem_pool,
281 now->val.ucs4, from_name, to_name,
282 ellipsis != tok_ellipsis2);
69f155d4
UD
283
284 /* Ignore the rest of the line. */
12a9fabe 285 lr_ignore_rest (repfile, 0);
69f155d4
UD
286
287 from_name = NULL;
288 to_name = NULL;
289
290 continue;
291
292 case 4:
293 if (nowtok != tok_bsymbol)
294 {
12a9fabe 295 lr_error (repfile,
69f155d4
UD
296 _("syntax error in repertoire map definition: %s"),
297 _("no symbolic name given for end of range"));
298
12a9fabe 299 lr_ignore_rest (repfile, 0);
69f155d4
UD
300 state = 2;
301 continue;
302 }
303
304 /* Copy the to-name in a safe place. */
305 to_name = (char *) obstack_copy0 (&result->mem_pool,
4b10dd6c
UD
306 repfile->token.val.str.startmb,
307 repfile->token.val.str.lenmb);
69f155d4
UD
308
309 state = 5;
310 continue;
311
312 case 90:
313 if (nowtok != tok_charids)
12a9fabe 314 lr_error (repfile, _("\
11bf311e 315%1$s: definition does not end with `END %1$s'"), "CHARIDS");
69f155d4 316
12a9fabe 317 lr_ignore_rest (repfile, nowtok == tok_charids);
69f155d4
UD
318 break;
319 }
320
321 break;
322 }
323
324 if (state != 2 && state != 90 && !be_quiet)
f2b98f97
UD
325 WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
326 repfile->fname));
69f155d4 327
12a9fabe 328 lr_close (repfile);
69f155d4 329
4b10dd6c
UD
330 if (tsearch (result, &known, &repertoire_compare) == NULL)
331 /* Something went wrong. */
c69136ae 332 WITH_CUR_LOCALE (error (0, errno, _("cannot save new repertoire map")));
4b10dd6c 333
69f155d4
UD
334 return result;
335}
336
337
3c833378
UD
338void
339repertoire_complain (const char *name)
340{
341 if (tfind (name, &unavailable, (__compar_fn_t) strcmp) == NULL)
342 {
f2b98f97
UD
343 WITH_CUR_LOCALE (error (0, errno, _("\
344repertoire map file `%s' not found"), name));
3c833378
UD
345
346 /* Remember that we reported this map. */
347 tsearch (name, &unavailable, (__compar_fn_t) strcmp);
348 }
349}
350
351
4b10dd6c
UD
352static int
353repertoire_compare (const void *p1, const void *p2)
354{
355 struct repertoire_t *r1 = (struct repertoire_t *) p1;
356 struct repertoire_t *r2 = (struct repertoire_t *) p2;
357
358 return strcmp (r1->name, r2->name);
359}
360
361
69f155d4 362static const struct keyword_t *
4b10dd6c 363repertoiremap_hash (const char *str, unsigned int len)
69f155d4 364{
646b6970 365 static const struct keyword_t wordlist[] =
69f155d4 366 {
12a9fabe
UD
367 {"escape_char", tok_escape_char, 0},
368 {"comment_char", tok_comment_char, 0},
69f155d4
UD
369 {"CHARIDS", tok_charids, 0},
370 {"END", tok_end, 0},
371 };
372
373 if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
374 return &wordlist[0];
375 if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
376 return &wordlist[1];
377 if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
378 return &wordlist[2];
379 if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
380 return &wordlist[3];
381
382 return NULL;
383}
4b10dd6c
UD
384
385
386static void
387repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt,
388 struct obstack *ob, uint32_t value, const char *from,
389 const char *to, int decimal_ellipsis)
390{
391 char *from_end;
392 char *to_end;
393 const char *cp;
394 char *buf = NULL;
395 int prefix_len, len1, len2;
396 unsigned int from_nr, to_nr, cnt;
397
398 if (to == NULL)
399 {
400 insert_entry (ht, from, strlen (from),
401 (void *) (unsigned long int) value);
402 /* Please note that it isn't a bug if a symbol is defined more
403 than once. All later definitions are simply discarded. */
404
405 insert_entry (rt, obstack_copy (ob, &value, sizeof (value)),
406 sizeof (value), (void *) from);
407
408 return;
409 }
410
411 /* We have a range: the names must have names with equal prefixes
412 and an equal number of digits, where the second number is greater
413 or equal than the first. */
414 len1 = strlen (from);
415 len2 = strlen (to);
416
417 if (len1 != len2)
418 {
419 invalid_range:
420 lr_error (lr, _("invalid names for character range"));
421 return;
422 }
423
424 cp = &from[len1 - 1];
425 if (decimal_ellipsis)
426 while (isdigit (*cp) && cp >= from)
427 --cp;
428 else
429 while (isxdigit (*cp) && cp >= from)
430 {
431 if (!isdigit (*cp) && !isupper (*cp))
432 lr_error (lr, _("\
433hexadecimal range format should use only capital characters"));
434 --cp;
435 }
436
437 prefix_len = (cp - from) + 1;
438
439 if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
440 goto invalid_range;
441
442 errno = 0;
443 from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
444 if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
445 || ((to_nr = strtoul (&to[prefix_len], &to_end,
446 decimal_ellipsis ? 10 : 16)) == ULONG_MAX
447 && errno == ERANGE)
448 || *to_end != '\0')
449 {
70e51ab9
UD
450 lr_error (lr, _("<%s> and <%s> are invalid names for range"),
451 from, to);
4b10dd6c
UD
452 return;
453 }
454
455 if (from_nr > to_nr)
456 {
11bf311e 457 lr_error (lr, _("upper limit in range is smaller than lower limit"));
4b10dd6c
UD
458 return;
459 }
460
461 for (cnt = from_nr; cnt <= to_nr; ++cnt)
462 {
463 uint32_t this_value = value + (cnt - from_nr);
464
465 obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
466 prefix_len, from, len1 - prefix_len, cnt);
1c95408c 467 obstack_1grow (ob, '\0');
4b10dd6c
UD
468
469 insert_entry (ht, buf, len1,
470 (void *) (unsigned long int) this_value);
471 /* Please note we don't examine the return value since it is no error
472 if we have two definitions for a symbol. */
473
474 insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)),
475 sizeof (this_value), (void *) from);
476 }
477}
478
479
480uint32_t
481repertoire_find_value (const struct repertoire_t *rep, const char *name,
482 size_t len)
483{
484 void *result;
485
5b5255f1 486 if (rep == NULL)
f0a4b6b1 487 return ILLEGAL_CHAR_VALUE;
5b5255f1 488
4b10dd6c
UD
489 if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0)
490 return ILLEGAL_CHAR_VALUE;
491
492 return (uint32_t) ((unsigned long int) result);
493}
494
495
496const char *
497repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs)
498{
499 void *result;
500
5b5255f1 501 if (rep == NULL)
f0a4b6b1 502 return NULL;
5b5255f1 503
4b10dd6c
UD
504 if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs),
505 &result) < 0)
506 return NULL;
507
508 return (const char *) result;
509}
510
511
512struct charseq *
513repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs)
514{
515 void *result;
516
5b5255f1 517 if (rep == NULL)
f0a4b6b1 518 return NULL;
5b5255f1 519
4b10dd6c
UD
520 if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs),
521 &result) < 0)
522 return NULL;
523
524 return (struct charseq *) result;
525}