]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_conf.c
Update.
[thirdparty/glibc.git] / iconv / gconv_conf.c
CommitLineData
6973fc01 1/* Handle configuration data.
d6204268 2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
d6204268 21#include <assert.h>
6973fc01 22#include <ctype.h>
e34b0f29 23#include <errno.h>
fab6d621 24#include <limits.h>
6973fc01
UD
25#include <search.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30#include <sys/param.h>
31
e62c19f1
UD
32#include <gconv_int.h>
33
6973fc01
UD
34
35/* This is the default path where we look for module lists. */
36static const char default_gconv_path[] = GCONV_PATH;
37
d6204268
UD
38/* The path element in use. */
39const struct path_elem *__gconv_path_elem;
40/* Maximum length of a single path element. */
41size_t __gconv_max_path_elem_len;
42
43/* We use the following struct if we couldn't allocate memory. */
44static const struct path_elem empty_path_elem;
45
6973fc01
UD
46/* Name of the file containing the module information in the directories
47 along the path. */
48static const char gconv_conf_filename[] = "gconv-modules";
49
e34b0f29
UD
50/* Filename extension for the modules. */
51#ifndef MODULE_EXT
52# define MODULE_EXT ".so"
53#endif
54static const char gconv_module_ext[] = MODULE_EXT;
55
6973fc01
UD
56/* We have a few builtin transformations. */
57static struct gconv_module builtin_modules[] =
58{
59#define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
8619129f 60 Fct, Init, End, MinF, MaxF, MinT, MaxT) \
6973fc01
UD
61 { \
62 from_pattern: From, \
63 from_constpfx: ConstPfx, \
64 from_constpfx_len: ConstLen, \
65 from_regex: NULL, \
66 to_string: To, \
fab6d621
UD
67 cost_hi: Cost, \
68 cost_lo: INT_MAX, \
6973fc01
UD
69 module_name: Name \
70 },
5891046a
UD
71#define BUILTIN_ALIAS(From, To)
72
73#include "gconv_builtin.h"
74};
75
76#undef BUILTIN_TRANSFORMATION
77#undef BUILTIN_ALIAS
78
390500b1 79static const char *builtin_aliases[] =
5891046a
UD
80{
81#define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
8619129f 82 Fct, Init, End, MinF, MaxF, MinT, MaxT)
5891046a 83#define BUILTIN_ALIAS(From, To) From " " To,
6973fc01
UD
84
85#include "gconv_builtin.h"
86};
87
50304ef0 88#ifdef USE_IN_LIBIO
08a0d60a 89# include <libio/libioP.h>
50304ef0
UD
90# define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp)
91#endif
92
6973fc01 93
2bd60880 94/* Test whether there is already a matching module known. */
6973fc01 95static int
2bd60880
UD
96internal_function
97detect_conflict (const char *alias, size_t alias_len)
6973fc01 98{
2bd60880 99 struct gconv_module *node = __gconv_modules_db;
6973fc01 100
2bd60880 101 while (node != NULL)
6973fc01 102 {
2bd60880
UD
103 int cmpres = strncmp (alias, node->from_constpfx,
104 MIN (alias_len, node->from_constpfx_len));
8c479619 105
2bd60880 106 if (cmpres == 0)
8c479619 107 {
2bd60880
UD
108 struct gconv_module *runp;
109
110 if (alias_len < node->from_constpfx_len)
111 /* Cannot possibly match. */
112 return 0;
113
114 /* This means the prefix and the alias are identical. If
115 there is now a simple extry or a regular expression
116 matching this name we have found a conflict. If there is
117 no conflict with the elements in the `same' list there
118 cannot be a conflict. */
119 runp = node;
120 do
8c479619 121 {
2bd60880
UD
122 if (runp->from_pattern == NULL)
123 {
124 /* This is a simple entry and therefore we have a
125 conflict if the strings are really the same. */
126 if (alias_len == node->from_constpfx_len)
127 return 1;
128 }
8c479619 129 else
2bd60880
UD
130 {
131 /* Compile the regular expression if necessary. */
132 if (runp->from_regex == NULL)
133 {
134 if (__regcomp (&runp->from_regex_mem,
135 runp->from_pattern,
136 REG_EXTENDED | REG_ICASE) != 0)
137 /* Something is wrong. Remember this. */
138 runp->from_regex = (regex_t *) -1L;
139 else
140 runp->from_regex = &runp->from_regex_mem;
141 }
142
143 if (runp->from_regex != (regex_t *) -1L)
144 {
145 regmatch_t match[1];
146
147 /* Try to match the regular expression. */
148 if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
149 && match[0].rm_so == 0
150 && alias[match[0].rm_eo] == '\0')
151 /* They match, therefore it is a conflict. */
152 return 1;
153 }
154 }
155
156 runp = runp->same;
8c479619 157 }
2bd60880 158 while (runp != NULL);
8c479619 159
2bd60880
UD
160 if (alias_len == node->from_constpfx_len)
161 return 0;
8c479619 162
2bd60880 163 node = node->matching;
8c479619 164 }
2bd60880
UD
165 else if (cmpres < 0)
166 node = node->left;
167 else
168 node = node->right;
8c479619 169 }
2bd60880
UD
170
171 return node != NULL;
8c479619
UD
172}
173
174
6973fc01
UD
175/* Add new alias. */
176static inline void
8c479619 177add_alias (char *rp, void *modules)
6973fc01
UD
178{
179 /* We now expect two more string. The strings are normalized
180 (converted to UPPER case) and strored in the alias database. */
181 struct gconv_alias *new_alias;
182 char *from, *to, *wp;
183
184 while (isspace (*rp))
185 ++rp;
186 from = wp = rp;
187 while (*rp != '\0' && !isspace (*rp))
256846bb 188 *wp++ = toupper (*rp++);
6973fc01
UD
189 if (*rp == '\0')
190 /* There is no `to' string on the line. Ignore it. */
191 return;
256846bb
UD
192 *wp++ = '\0';
193 to = ++rp;
6973fc01
UD
194 while (isspace (*rp))
195 ++rp;
196 while (*rp != '\0' && !isspace (*rp))
bd4848fb 197 *wp++ = toupper (*rp++);
6973fc01
UD
198 if (to == wp)
199 /* No `to' string, ignore the line. */
200 return;
201 *wp++ = '\0';
202
2bd60880
UD
203 /* Test whether this alias conflicts with any available module. */
204 if (detect_conflict (from, to - from - 1))
8c479619
UD
205 /* It does conflict, don't add the alias. */
206 return;
207
6973fc01
UD
208 new_alias = (struct gconv_alias *)
209 malloc (sizeof (struct gconv_alias) + (wp - from));
e34b0f29
UD
210 if (new_alias != NULL)
211 {
390500b1
UD
212 void **inserted;
213
e34b0f29
UD
214 new_alias->fromname = memcpy ((char *) new_alias
215 + sizeof (struct gconv_alias),
216 from, wp - from);
217 new_alias->toname = new_alias->fromname + (to - from);
218
390500b1
UD
219 inserted = (void **) __tsearch (new_alias, &__gconv_alias_db,
220 __gconv_alias_compare);
23f5f62d 221 if (inserted == NULL || *inserted != new_alias)
e34b0f29
UD
222 /* Something went wrong, free this entry. */
223 free (new_alias);
224 }
6973fc01
UD
225}
226
227
2bd60880
UD
228/* Insert a data structure for a new module in the search tree. */
229static inline void
230internal_function
231insert_module (struct gconv_module *newp)
232{
233 struct gconv_module **rootp = &__gconv_modules_db;
234
235 while (*rootp != NULL)
236 {
237 struct gconv_module *root = *rootp;
238 size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
239 int cmpres;
240
241 cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
242 if (cmpres == 0)
243 {
244 /* This can mean two things: the prefix is entirely the same or
245 it matches only for the minimum length of both strings. */
246 if (newp->from_constpfx_len == root->from_constpfx_len)
247 {
248 /* Both prefixes are identical. Insert the string at the
249 end of the `same' list if it is not already there. */
250 const char *from_pattern = (newp->from_pattern
251 ?: newp->from_constpfx);
252
253 while (strcmp (from_pattern,
254 root->from_pattern ?: root->from_constpfx) != 0
255 || strcmp (newp->to_string, root->to_string) != 0)
256 {
257 rootp = &root->same;
258 root = *rootp;
259 if (root == NULL)
260 break;
261 }
262
263 if (root != NULL)
264 /* This is a no new conversion. */
265 return;
266
267 break;
268 }
269
270 /* The new element either has a prefix which is itself a
271 prefix for the prefix of the current node or vice verse.
272 In the first case we insert the node right here. Otherwise
273 we have to descent further. */
274 if (newp->from_constpfx_len < root->from_constpfx_len)
275 {
276 newp->matching = root;
277 break;
278 }
279
280 rootp = &root->matching;
281 }
282 else if (cmpres < 0)
283 rootp = &root->left;
284 else
285 rootp = &root->right;
286 }
287
288 /* Plug in the new node here. */
289 *rootp = newp;
290}
291
292
6973fc01
UD
293/* Add new module. */
294static inline void
2bd60880 295internal_function
6973fc01 296add_module (char *rp, const char *directory, size_t dir_len, void **modules,
fab6d621 297 size_t *nmodules, int modcounter)
6973fc01
UD
298{
299 /* We expect now
300 1. `from' name
301 2. `to' name
302 3. filename of the module
303 4. an optional cost value
304 */
305 struct gconv_module *new_module;
306 char *from, *to, *module, *wp;
307 size_t const_len;
308 int from_is_regex;
e34b0f29 309 int need_ext;
fab6d621 310 int cost_hi;
6973fc01
UD
311
312 while (isspace (*rp))
313 ++rp;
314 from = rp;
315 from_is_regex = 0;
316 while (*rp != '\0' && !isspace (*rp))
317 {
318 if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
2bd60880 319 && *rp != '_' && *rp != '(' && *rp != ')')
6973fc01 320 from_is_regex = 1;
bd4848fb 321 *rp = toupper (*rp);
6973fc01
UD
322 ++rp;
323 }
324 if (*rp == '\0')
325 return;
326 *rp++ = '\0';
327 to = wp = rp;
328 while (isspace (*rp))
329 ++rp;
330 while (*rp != '\0' && !isspace (*rp))
bd4848fb 331 *wp++ = toupper (*rp++);
6973fc01
UD
332 if (*rp == '\0')
333 return;
334 *wp++ = '\0';
335 do
336 ++rp;
337 while (isspace (*rp));
338 module = wp;
339 while (*rp != '\0' && !isspace (*rp))
340 *wp++ = *rp++;
341 if (*rp == '\0')
342 {
343 /* There is no cost, use one by default. */
344 *wp++ = '\0';
fab6d621 345 cost_hi = 1;
6973fc01
UD
346 }
347 else
348 {
349 /* There might be a cost value. */
350 char *endp;
351
352 *wp++ = '\0';
fab6d621 353 cost_hi = strtol (rp, &endp, 10);
2bd60880 354 if (rp == endp || cost_hi < 1)
6973fc01 355 /* No useful information. */
fab6d621 356 cost_hi = 1;
6973fc01
UD
357 }
358
359 if (module[0] == '\0')
360 /* No module name given. */
361 return;
362 if (module[0] == '/')
363 dir_len = 0;
364 else
365 /* Increment by one for the slash. */
366 ++dir_len;
367
e34b0f29
UD
368 /* See whether we must add the ending. */
369 need_ext = 0;
370 if (wp - module < sizeof (gconv_module_ext)
371 || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
372 sizeof (gconv_module_ext)) != 0)
373 /* We must add the module extension. */
374 need_ext = sizeof (gconv_module_ext) - 1;
375
6973fc01
UD
376 /* We've collected all the information, now create an entry. */
377
6973fc01 378 if (from_is_regex)
e34b0f29
UD
379 {
380 const_len = 0;
381 while (isalnum (from[const_len]) || from[const_len] == '-'
382 || from[const_len] == '/' || from[const_len] == '.'
383 || from[const_len] == '_')
384 ++const_len;
385 }
386 else
387 const_len = to - from - 1;
6973fc01 388
2bd60880
UD
389 new_module = (struct gconv_module *) calloc (1,
390 sizeof (struct gconv_module)
e34b0f29
UD
391 + (wp - from)
392 + dir_len + need_ext);
6973fc01
UD
393 if (new_module != NULL)
394 {
e34b0f29
UD
395 char *tmp;
396
397 new_module->from_constpfx = memcpy ((char *) new_module
398 + sizeof (struct gconv_module),
399 from, to - from);
6973fc01 400 if (from_is_regex)
e34b0f29 401 new_module->from_pattern = new_module->from_constpfx;
e34b0f29
UD
402
403 new_module->from_constpfx_len = const_len;
404
6973fc01 405 new_module->to_string = memcpy ((char *) new_module->from_constpfx
e34b0f29 406 + (to - from), to, module - to);
6973fc01 407
fab6d621
UD
408 new_module->cost_hi = cost_hi;
409 new_module->cost_lo = modcounter;
6973fc01 410
e34b0f29
UD
411 new_module->module_name = (char *) new_module->to_string + (module - to);
412
6973fc01 413 if (dir_len == 0)
e34b0f29 414 tmp = (char *) new_module->module_name;
6973fc01
UD
415 else
416 {
6973fc01
UD
417 tmp = __mempcpy ((char *) new_module->module_name,
418 directory, dir_len - 1);
419 *tmp++ = '/';
6973fc01
UD
420 }
421
e34b0f29
UD
422 tmp = __mempcpy (tmp, module, wp - module);
423
424 if (need_ext)
425 memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
426
8c479619
UD
427 /* See whether we have already an alias with this name defined.
428 We do allow regular expressions matching this any alias since
429 this expression can also match other names and we test for aliases
430 before testing for modules. */
431 if (! from_is_regex)
432 {
433 struct gconv_alias fake_alias;
434
435 fake_alias.fromname = new_module->from_constpfx;
436
437 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
438 != NULL)
439 {
440 /* This module duplicates an alias. */
441 free (new_module);
442 return;
443 }
444 }
445
2bd60880
UD
446 /* Now insert the new module data structure in our search tree. */
447 insert_module (new_module);
6973fc01
UD
448 }
449}
450
451
6973fc01
UD
452/* Read the next configuration file. */
453static void
454internal_function
455read_conf_file (const char *filename, const char *directory, size_t dir_len,
456 void **modules, size_t *nmodules)
457{
458 FILE *fp = fopen (filename, "r");
459 char *line = NULL;
460 size_t line_len = 0;
fab6d621 461 int modcounter = 0;
6973fc01
UD
462
463 /* Don't complain if a file is not present or readable, simply silently
464 ignore it. */
465 if (fp == NULL)
466 return;
467
468 /* Process the known entries of the file. Comments start with `#' and
469 end with the end of the line. Empty lines are ignored. */
77ccaba1 470 while (!feof_unlocked (fp))
6973fc01
UD
471 {
472 char *rp, *endp, *word;
473 ssize_t n = __getdelim (&line, &line_len, '\n', fp);
474 if (n < 0)
475 /* An error occurred. */
476 break;
477
478 rp = line;
6973fc01
UD
479 /* Terminate the line (excluding comments or newline) by an NUL byte
480 to simplify the following code. */
481 endp = strchr (rp, '#');
482 if (endp != NULL)
483 *endp = '\0';
484 else
04be94a8
UD
485 if (rp[n - 1] == '\n')
486 rp[n - 1] = '\0';
6973fc01 487
e34b0f29
UD
488 while (isspace (*rp))
489 ++rp;
490
6973fc01
UD
491 /* If this is an empty line go on with the next one. */
492 if (rp == endp)
493 continue;
494
495 word = rp;
496 while (*rp != '\0' && !isspace (*rp))
497 ++rp;
498
499 if (rp - word == sizeof ("alias") - 1
e34b0f29 500 && memcmp (word, "alias", sizeof ("alias") - 1) == 0)
8c479619 501 add_alias (rp, *modules);
6973fc01 502 else if (rp - word == sizeof ("module") - 1
e34b0f29 503 && memcmp (word, "module", sizeof ("module") - 1) == 0)
fab6d621 504 add_module (rp, directory, dir_len, modules, nmodules, modcounter++);
6973fc01
UD
505 /* else */
506 /* Otherwise ignore the line. */
507 }
508
d6204268
UD
509 free (line);
510
6973fc01
UD
511 fclose (fp);
512}
513
514
d6204268
UD
515/* Determine the directories we are looking for data in. */
516void
517__gconv_get_path (void)
518{
519 struct path_elem *result;
520 __libc_lock_define_initialized (static, lock);
521
522 __libc_lock_lock (lock);
523
524 /* Make sure there wasn't a second thread doing it already. */
525 result = (struct path_elem *) __gconv_path_elem;
526 if (result == NULL)
527 {
528 /* Determine the complete path first. */
529 const char *user_path;
530 char *gconv_path;
531 size_t gconv_path_len;
532 char *elem;
533 char *oldp;
534 char *cp;
535 int nelems;
536
537 user_path = __secure_getenv ("GCONV_PATH");
538 if (user_path == NULL)
539 {
540 /* No user-defined path. Make a modifiable copy of the
541 default path. */
542 gconv_path = strdupa (default_gconv_path);
543 gconv_path_len = sizeof (default_gconv_path);
544 }
545 else
546 {
547 /* Append the default path to the user-defined path. */
548 size_t user_len = strlen (user_path);
549
550 gconv_path_len = user_len + 1 + sizeof (default_gconv_path);
551 gconv_path = alloca (gconv_path_len);
552 __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
553 ":", 1),
554 default_gconv_path, sizeof (default_gconv_path));
555 }
556
557 /* In a first pass we calculate the number of elements. */
558 oldp = NULL;
559 cp = strchr (gconv_path, ':');
560 nelems = 1;
561 while (cp != NULL)
562 {
563 if (cp != oldp + 1)
564 ++nelems;
565 oldp = cp;
566 cp = strchr (cp + 1, ':');
567 }
568
569 /* Allocate the memory for the result. */
570 result = (struct path_elem *) malloc ((nelems + 1)
571 * sizeof (struct path_elem)
572 + gconv_path_len + nelems);
573 if (result != NULL)
574 {
575 char *strspace = (char *) &result[nelems + 1];
576 int n = 0;
577
578 /* Separate the individual parts. */
579 __gconv_max_path_elem_len = 0;
580 elem = __strtok_r (gconv_path, ":", &gconv_path);
581 assert (elem != NULL);
582 do
583 {
584 result[n].name = strspace;
585 strspace = __stpcpy (strspace, elem);
586 if (strspace[-1] != '/')
587 *strspace++ = '/';
588
589 result[n].len = strspace - result[n].name;
590 if (result[n].len > __gconv_max_path_elem_len)
591 __gconv_max_path_elem_len = result[n].len;
592
593 *strspace++ = '\0';
594 ++n;
595 }
596 while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL);
597
598 result[n].name = NULL;
599 result[n].len = 0;
600 }
601
602 __gconv_path_elem = result ?: &empty_path_elem;
603 }
604
605 __libc_lock_unlock (lock);
606}
607
608
6973fc01
UD
609/* Read all configuration files found in the user-specified and the default
610 path. */
611void
612__gconv_read_conf (void)
613{
6973fc01
UD
614 void *modules = NULL;
615 size_t nmodules = 0;
e34b0f29 616 int save_errno = errno;
5891046a 617 size_t cnt;
6973fc01 618
d6204268
UD
619 /* Find out where we have to look. */
620 if (__gconv_path_elem == NULL)
621 __gconv_get_path ();
6973fc01 622
d6204268 623 for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt)
6973fc01 624 {
d6204268 625 char real_elem[__gconv_max_path_elem_len + sizeof (gconv_conf_filename)];
6973fc01 626
d6204268 627 if (__realpath (__gconv_path_elem[cnt].name, real_elem) != NULL)
6973fc01
UD
628 {
629 size_t elem_len = strlen (real_elem);
2bd60880 630 char *filename;
6973fc01
UD
631
632 filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
2bd60880
UD
633 __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
634 "/", 1),
635 gconv_conf_filename, sizeof (gconv_conf_filename));
6973fc01
UD
636
637 /* Read the next configuration file. */
638 read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
639 }
6973fc01
UD
640 }
641
2bd60880
UD
642 /* Add the internal modules. */
643 for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
644 ++cnt)
6973fc01 645 {
2bd60880 646 if (builtin_modules[cnt].from_pattern == NULL)
e34b0f29 647 {
2bd60880 648 struct gconv_alias fake_alias;
6973fc01 649
2bd60880 650 fake_alias.fromname = builtin_modules[cnt].from_constpfx;
6973fc01 651
2bd60880
UD
652 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
653 != NULL)
654 /* It'll conflict so don't add it. */
655 continue;
e34b0f29 656 }
2bd60880
UD
657
658 insert_module (&builtin_modules[cnt]);
e34b0f29 659 }
6973fc01 660
5891046a
UD
661 /* Add aliases for builtin conversions. */
662 cnt = sizeof (builtin_aliases) / sizeof (builtin_aliases[0]);
663 while (cnt > 0)
664 {
665 char *copy = strdupa (builtin_aliases[--cnt]);
8c479619 666 add_alias (copy, modules);
5891046a
UD
667 }
668
e34b0f29
UD
669 /* Restore the error number. */
670 __set_errno (save_errno);
6973fc01 671}
d6204268
UD
672
673
674
675/* Free all resources if necessary. */
676static void __attribute__ ((unused))
677free_mem (void)
678{
679 if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem)
680 free ((void *) __gconv_path_elem);
681}
682
683text_set_element (__libc_subfreeres, free_mem);