]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/programs/repertoire.c
Update.
[thirdparty/glibc.git] / locale / programs / repertoire.c
CommitLineData
4b10dd6c 1/* Copyright (C) 1998, 1999 Free Software Foundation, Inc.
69f155d4
UD
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include <errno.h>
25#include <error.h>
26#include <limits.h>
4b10dd6c
UD
27#include <obstack.h>
28#include <search.h>
69f155d4
UD
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include "linereader.h"
4b10dd6c 34#include "charmap.h"
69f155d4
UD
35#include "repertoire.h"
36#include "simple-hash.h"
4b10dd6c 37#include "localedef.h"
69f155d4
UD
38
39
40/* Simple keyword hashing for the repertoiremap. */
4b10dd6c
UD
41static const struct keyword_t *repertoiremap_hash (const char *str,
42 unsigned int len);
43static void repertoire_new_char (struct linereader *lr, hash_table *ht,
44 hash_table *rt, struct obstack *ob,
45 uint32_t value, const char *from,
46 const char *to, int decimal_ellipsis);
47static int repertoire_compare (const void *p1, const void *p2);
48
49/* Already known repertoire maps. */
50static void *known;
69f155d4
UD
51
52
53struct repertoire_t *
54repertoire_read (const char *filename)
55{
12a9fabe 56 struct linereader *repfile;
69f155d4 57 struct repertoire_t *result;
4b10dd6c
UD
58 struct repertoire_t **resultp;
59 struct repertoire_t search;
69f155d4
UD
60 int state;
61 char *from_name = NULL;
62 char *to_name = NULL;
4b10dd6c
UD
63 enum token_t ellipsis = tok_none;
64
65 search.name = filename;
66 resultp = tfind (&search, &known, &repertoire_compare);
67 if (resultp != NULL)
68 return *resultp;
69f155d4
UD
69
70 /* Determine path. */
12a9fabe
UD
71 repfile = lr_open (filename, repertoiremap_hash);
72 if (repfile == NULL)
69f155d4
UD
73 {
74 if (strchr (filename, '/') == NULL)
75 {
4b10dd6c 76 char *i18npath = getenv ("I18NPATH");
12a9fabe
UD
77 if (i18npath != NULL && *i18npath != '\0')
78 {
79 char path[strlen (filename) + 1 + strlen (i18npath)
80 + sizeof ("/repertoiremaps/") - 1];
81 char *next;
82 i18npath = strdupa (i18npath);
69f155d4 83
69f155d4 84
12a9fabe
UD
85 while (repfile == NULL
86 && (next = strsep (&i18npath, ":")) != NULL)
87 {
88 stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"),
89 filename);
90
91 repfile = lr_open (path, repertoiremap_hash);
4b10dd6c
UD
92
93 if (repfile == NULL)
94 {
95 stpcpy (stpcpy (path, next), filename);
96
97 repfile = lr_open (path, repertoiremap_hash);
98 }
12a9fabe
UD
99 }
100 }
101
102 if (repfile == NULL)
103 {
104 /* Look in the systems charmap directory. */
105 char *buf = xmalloc (strlen (filename) + 1
106 + sizeof (REPERTOIREMAP_PATH));
107
108 stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"),
109 filename);
110 repfile = lr_open (buf, repertoiremap_hash);
111
112 if (repfile == NULL)
113 free (buf);
114 }
69f155d4
UD
115 }
116
12a9fabe
UD
117 if (repfile == NULL)
118 {
119 error (0, errno, _("repertoire map file `%s' not found"), filename);
120 return NULL;
121 }
69f155d4
UD
122 }
123
4b10dd6c
UD
124 /* We don't want symbolic names in string to be translated. */
125 repfile->translate_strings = 0;
126
69f155d4
UD
127 /* Allocate room for result. */
128 result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
129 memset (result, '\0', sizeof (struct repertoire_t));
130
4b10dd6c
UD
131 result->name = xstrdup (filename);
132
69f155d4
UD
133#define obstack_chunk_alloc malloc
134#define obstack_chunk_free free
135 obstack_init (&result->mem_pool);
136
4b10dd6c
UD
137 if (init_hash (&result->char_table, 256)
138 || init_hash (&result->reverse_table, 256)
139 || init_hash (&result->seq_table, 256))
69f155d4
UD
140 {
141 free (result);
142 return NULL;
143 }
144
145 /* We use a state machine to describe the charmap description file
146 format. */
147 state = 1;
148 while (1)
149 {
150 /* What's on? */
4b10dd6c 151 struct token *now = lr_token (repfile, NULL, NULL);
69f155d4
UD
152 enum token_t nowtok = now->tok;
153 struct token *arg;
154
155 if (nowtok == tok_eof)
156 break;
157
158 switch (state)
159 {
160 case 1:
161 /* We haven't yet read any character definition. This is where
162 we accept escape_char and comment_char definitions. */
163 if (nowtok == tok_eol)
164 /* Ignore empty lines. */
165 continue;
166
167 if (nowtok == tok_escape_char || nowtok == tok_comment_char)
168 {
169 /* We know that we need an argument. */
4b10dd6c 170 arg = lr_token (repfile, NULL, NULL);
69f155d4
UD
171
172 if (arg->tok != tok_ident)
173 {
12a9fabe 174 lr_error (repfile, _("syntax error in prolog: %s"),
69f155d4
UD
175 _("bad argument"));
176
12a9fabe 177 lr_ignore_rest (repfile, 0);
69f155d4
UD
178 continue;
179 }
180
4b10dd6c 181 if (arg->val.str.lenmb != 1)
69f155d4 182 {
12a9fabe 183 lr_error (repfile, _("\
69f155d4
UD
184argument to <%s> must be a single character"),
185 nowtok == tok_escape_char ? "escape_char"
186 : "comment_char");
187
12a9fabe 188 lr_ignore_rest (repfile, 0);
69f155d4
UD
189 continue;
190 }
191
192 if (nowtok == tok_escape_char)
4b10dd6c 193 repfile->escape_char = *arg->val.str.startmb;
69f155d4 194 else
4b10dd6c 195 repfile->comment_char = *arg->val.str.startmb;
69f155d4 196
12a9fabe 197 lr_ignore_rest (repfile, 1);
69f155d4
UD
198 continue;
199 }
200
201 if (nowtok == tok_charids)
202 {
12a9fabe 203 lr_ignore_rest (repfile, 1);
69f155d4
UD
204
205 state = 2;
206 continue;
207 }
208
209 /* Otherwise we start reading the character definitions. */
210 state = 2;
211 /* FALLTHROUGH */
212
213 case 2:
214 /* We are now are in the body. Each line
215 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
216 if (nowtok == tok_eol)
217 /* Ignore empty lines. */
218 continue;
219
220 if (nowtok == tok_end)
221 {
222 state = 90;
223 continue;
224 }
225
226 if (nowtok != tok_bsymbol)
227 {
12a9fabe 228 lr_error (repfile,
69f155d4
UD
229 _("syntax error in repertoire map definition: %s"),
230 _("no symbolic name given"));
231
12a9fabe 232 lr_ignore_rest (repfile, 0);
69f155d4
UD
233 continue;
234 }
235
236 /* If the previous line was not completely correct free the
237 used memory. */
238 if (from_name != NULL)
239 obstack_free (&result->mem_pool, from_name);
240
241 from_name = (char *) obstack_copy0 (&result->mem_pool,
4b10dd6c
UD
242 now->val.str.startmb,
243 now->val.str.lenmb);
69f155d4
UD
244 to_name = NULL;
245
246 state = 3;
247 continue;
248
249 case 3:
250 /* We have two possibilities: We can see an ellipsis or an
251 encoding value. */
4b10dd6c
UD
252 if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
253 || nowtok == tok_ellipsis2)
69f155d4 254 {
4b10dd6c 255 ellipsis = nowtok;
69f155d4
UD
256 state = 4;
257 continue;
258 }
259 /* FALLTHROUGH */
260
261 case 5:
262 /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
263 the xxx mean a hexadecimal value. */
264 state = 2;
265
266 errno = 0;
4b10dd6c 267 if (nowtok != tok_ucs4)
69f155d4 268 {
12a9fabe 269 lr_error (repfile,
69f155d4
UD
270 _("syntax error in repertoire map definition: %s"),
271 _("no <Uxxxx> or <Uxxxxxxxx> value given"));
272
12a9fabe 273 lr_ignore_rest (repfile, 0);
69f155d4
UD
274 continue;
275 }
276
277 /* We've found a new valid definition. */
4b10dd6c
UD
278 repertoire_new_char (repfile, &result->char_table,
279 &result->reverse_table, &result->mem_pool,
280 now->val.ucs4, from_name, to_name,
281 ellipsis != tok_ellipsis2);
69f155d4
UD
282
283 /* Ignore the rest of the line. */
12a9fabe 284 lr_ignore_rest (repfile, 0);
69f155d4
UD
285
286 from_name = NULL;
287 to_name = NULL;
288
289 continue;
290
291 case 4:
292 if (nowtok != tok_bsymbol)
293 {
12a9fabe 294 lr_error (repfile,
69f155d4
UD
295 _("syntax error in repertoire map definition: %s"),
296 _("no symbolic name given for end of range"));
297
12a9fabe 298 lr_ignore_rest (repfile, 0);
69f155d4
UD
299 state = 2;
300 continue;
301 }
302
303 /* Copy the to-name in a safe place. */
304 to_name = (char *) obstack_copy0 (&result->mem_pool,
4b10dd6c
UD
305 repfile->token.val.str.startmb,
306 repfile->token.val.str.lenmb);
69f155d4
UD
307
308 state = 5;
309 continue;
310
311 case 90:
312 if (nowtok != tok_charids)
12a9fabe 313 lr_error (repfile, _("\
69f155d4
UD
314`%1$s' definition does not end with `END %1$s'"), "CHARIDS");
315
12a9fabe 316 lr_ignore_rest (repfile, nowtok == tok_charids);
69f155d4
UD
317 break;
318 }
319
320 break;
321 }
322
323 if (state != 2 && state != 90 && !be_quiet)
12a9fabe 324 error (0, 0, _("%s: premature end of file"), repfile->fname);
69f155d4 325
12a9fabe 326 lr_close (repfile);
69f155d4 327
4b10dd6c
UD
328 if (tsearch (result, &known, &repertoire_compare) == NULL)
329 /* Something went wrong. */
330 error (0, errno, _("cannot safe new repertoire map"));
331
69f155d4
UD
332 return result;
333}
334
335
4b10dd6c
UD
336static int
337repertoire_compare (const void *p1, const void *p2)
338{
339 struct repertoire_t *r1 = (struct repertoire_t *) p1;
340 struct repertoire_t *r2 = (struct repertoire_t *) p2;
341
342 return strcmp (r1->name, r2->name);
343}
344
345
69f155d4 346static const struct keyword_t *
4b10dd6c 347repertoiremap_hash (const char *str, unsigned int len)
69f155d4
UD
348{
349 static const struct keyword_t wordlist[0] =
350 {
12a9fabe
UD
351 {"escape_char", tok_escape_char, 0},
352 {"comment_char", tok_comment_char, 0},
69f155d4
UD
353 {"CHARIDS", tok_charids, 0},
354 {"END", tok_end, 0},
355 };
356
357 if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
358 return &wordlist[0];
359 if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
360 return &wordlist[1];
361 if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
362 return &wordlist[2];
363 if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
364 return &wordlist[3];
365
366 return NULL;
367}
4b10dd6c
UD
368
369
370static void
371repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt,
372 struct obstack *ob, uint32_t value, const char *from,
373 const char *to, int decimal_ellipsis)
374{
375 char *from_end;
376 char *to_end;
377 const char *cp;
378 char *buf = NULL;
379 int prefix_len, len1, len2;
380 unsigned int from_nr, to_nr, cnt;
381
382 if (to == NULL)
383 {
384 insert_entry (ht, from, strlen (from),
385 (void *) (unsigned long int) value);
386 /* Please note that it isn't a bug if a symbol is defined more
387 than once. All later definitions are simply discarded. */
388
389 insert_entry (rt, obstack_copy (ob, &value, sizeof (value)),
390 sizeof (value), (void *) from);
391
392 return;
393 }
394
395 /* We have a range: the names must have names with equal prefixes
396 and an equal number of digits, where the second number is greater
397 or equal than the first. */
398 len1 = strlen (from);
399 len2 = strlen (to);
400
401 if (len1 != len2)
402 {
403 invalid_range:
404 lr_error (lr, _("invalid names for character range"));
405 return;
406 }
407
408 cp = &from[len1 - 1];
409 if (decimal_ellipsis)
410 while (isdigit (*cp) && cp >= from)
411 --cp;
412 else
413 while (isxdigit (*cp) && cp >= from)
414 {
415 if (!isdigit (*cp) && !isupper (*cp))
416 lr_error (lr, _("\
417hexadecimal range format should use only capital characters"));
418 --cp;
419 }
420
421 prefix_len = (cp - from) + 1;
422
423 if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
424 goto invalid_range;
425
426 errno = 0;
427 from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
428 if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
429 || ((to_nr = strtoul (&to[prefix_len], &to_end,
430 decimal_ellipsis ? 10 : 16)) == ULONG_MAX
431 && errno == ERANGE)
432 || *to_end != '\0')
433 {
434 lr_error (lr, _("<%s> and <%s> are invalid names for range"));
435 return;
436 }
437
438 if (from_nr > to_nr)
439 {
440 lr_error (lr, _("upper limit in range is not smaller then lower limit"));
441 return;
442 }
443
444 for (cnt = from_nr; cnt <= to_nr; ++cnt)
445 {
446 uint32_t this_value = value + (cnt - from_nr);
447
448 obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
449 prefix_len, from, len1 - prefix_len, cnt);
450
451 insert_entry (ht, buf, len1,
452 (void *) (unsigned long int) this_value);
453 /* Please note we don't examine the return value since it is no error
454 if we have two definitions for a symbol. */
455
456 insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)),
457 sizeof (this_value), (void *) from);
458 }
459}
460
461
462uint32_t
463repertoire_find_value (const struct repertoire_t *rep, const char *name,
464 size_t len)
465{
466 void *result;
467
468 if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0)
469 return ILLEGAL_CHAR_VALUE;
470
471 return (uint32_t) ((unsigned long int) result);
472}
473
474
475const char *
476repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs)
477{
478 void *result;
479
480 if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs),
481 &result) < 0)
482 return NULL;
483
484 return (const char *) result;
485}
486
487
488struct charseq *
489repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs)
490{
491 void *result;
492
493 if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs),
494 &result) < 0)
495 return NULL;
496
497 return (struct charseq *) result;
498}