]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_conf.c
Update.
[thirdparty/glibc.git] / iconv / gconv_conf.c
1 /* Handle configuration data.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <search.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/param.h>
30
31 #include <gconv_int.h>
32
33
34 /* This is the default path where we look for module lists. */
35 static const char default_gconv_path[] = GCONV_PATH;
36
37 /* Name of the file containing the module information in the directories
38 along the path. */
39 static const char gconv_conf_filename[] = "gconv-modules";
40
41 /* Filename extension for the modules. */
42 #ifndef MODULE_EXT
43 # define MODULE_EXT ".so"
44 #endif
45 static const char gconv_module_ext[] = MODULE_EXT;
46
47 /* We have a few builtin transformations. */
48 static struct gconv_module builtin_modules[] =
49 {
50 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
51 Fct, Init, End, MinF, MaxF, MinT, MaxT) \
52 { \
53 from_pattern: From, \
54 from_constpfx: ConstPfx, \
55 from_constpfx_len: ConstLen, \
56 from_regex: NULL, \
57 to_string: To, \
58 cost_hi: Cost, \
59 cost_lo: INT_MAX, \
60 module_name: Name \
61 },
62 #define BUILTIN_ALIAS(From, To)
63
64 #include "gconv_builtin.h"
65 };
66
67 #undef BUILTIN_TRANSFORMATION
68 #undef BUILTIN_ALIAS
69
70 static const char *
71 builtin_aliases[] =
72 {
73 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
74 Fct, Init, End, MinF, MaxF, MinT, MaxT)
75 #define BUILTIN_ALIAS(From, To) From " " To,
76
77 #include "gconv_builtin.h"
78 };
79
80 #ifdef USE_IN_LIBIO
81 # include <libio/libioP.h>
82 # define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp)
83 #endif
84
85
86 /* Test whether there is already a matching module known. */
87 static int
88 internal_function
89 detect_conflict (const char *alias, size_t alias_len)
90 {
91 struct gconv_module *node = __gconv_modules_db;
92
93 while (node != NULL)
94 {
95 int cmpres = strncmp (alias, node->from_constpfx,
96 MIN (alias_len, node->from_constpfx_len));
97
98 if (cmpres == 0)
99 {
100 struct gconv_module *runp;
101
102 if (alias_len < node->from_constpfx_len)
103 /* Cannot possibly match. */
104 return 0;
105
106 /* This means the prefix and the alias are identical. If
107 there is now a simple extry or a regular expression
108 matching this name we have found a conflict. If there is
109 no conflict with the elements in the `same' list there
110 cannot be a conflict. */
111 runp = node;
112 do
113 {
114 if (runp->from_pattern == NULL)
115 {
116 /* This is a simple entry and therefore we have a
117 conflict if the strings are really the same. */
118 if (alias_len == node->from_constpfx_len)
119 return 1;
120 }
121 else
122 {
123 /* Compile the regular expression if necessary. */
124 if (runp->from_regex == NULL)
125 {
126 if (__regcomp (&runp->from_regex_mem,
127 runp->from_pattern,
128 REG_EXTENDED | REG_ICASE) != 0)
129 /* Something is wrong. Remember this. */
130 runp->from_regex = (regex_t *) -1L;
131 else
132 runp->from_regex = &runp->from_regex_mem;
133 }
134
135 if (runp->from_regex != (regex_t *) -1L)
136 {
137 regmatch_t match[1];
138
139 /* Try to match the regular expression. */
140 if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
141 && match[0].rm_so == 0
142 && alias[match[0].rm_eo] == '\0')
143 /* They match, therefore it is a conflict. */
144 return 1;
145 }
146 }
147
148 runp = runp->same;
149 }
150 while (runp != NULL);
151
152 if (alias_len == node->from_constpfx_len)
153 return 0;
154
155 node = node->matching;
156 }
157 else if (cmpres < 0)
158 node = node->left;
159 else
160 node = node->right;
161 }
162
163 return node != NULL;
164 }
165
166
167 /* Add new alias. */
168 static inline void
169 add_alias (char *rp, void *modules)
170 {
171 /* We now expect two more string. The strings are normalized
172 (converted to UPPER case) and strored in the alias database. */
173 struct gconv_alias *new_alias;
174 char *from, *to, *wp;
175
176 while (isspace (*rp))
177 ++rp;
178 from = wp = rp;
179 while (*rp != '\0' && !isspace (*rp))
180 *wp++ = toupper (*rp++);
181 if (*rp == '\0')
182 /* There is no `to' string on the line. Ignore it. */
183 return;
184 *wp++ = '\0';
185 to = ++rp;
186 while (isspace (*rp))
187 ++rp;
188 while (*rp != '\0' && !isspace (*rp))
189 *wp++ = toupper (*rp++);
190 if (to == wp)
191 /* No `to' string, ignore the line. */
192 return;
193 *wp++ = '\0';
194
195 /* Test whether this alias conflicts with any available module. */
196 if (detect_conflict (from, to - from - 1))
197 /* It does conflict, don't add the alias. */
198 return;
199
200 new_alias = (struct gconv_alias *)
201 malloc (sizeof (struct gconv_alias) + (wp - from));
202 if (new_alias != NULL)
203 {
204 new_alias->fromname = memcpy ((char *) new_alias
205 + sizeof (struct gconv_alias),
206 from, wp - from);
207 new_alias->toname = new_alias->fromname + (to - from);
208
209 if (__tfind (new_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL
210 || (__tsearch (new_alias, &__gconv_alias_db, __gconv_alias_compare)
211 == NULL))
212 /* Something went wrong, free this entry. */
213 free (new_alias);
214 }
215 }
216
217
218 /* Insert a data structure for a new module in the search tree. */
219 static inline void
220 internal_function
221 insert_module (struct gconv_module *newp)
222 {
223 struct gconv_module **rootp = &__gconv_modules_db;
224
225 while (*rootp != NULL)
226 {
227 struct gconv_module *root = *rootp;
228 size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
229 int cmpres;
230
231 cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
232 if (cmpres == 0)
233 {
234 /* This can mean two things: the prefix is entirely the same or
235 it matches only for the minimum length of both strings. */
236 if (newp->from_constpfx_len == root->from_constpfx_len)
237 {
238 /* Both prefixes are identical. Insert the string at the
239 end of the `same' list if it is not already there. */
240 const char *from_pattern = (newp->from_pattern
241 ?: newp->from_constpfx);
242
243 while (strcmp (from_pattern,
244 root->from_pattern ?: root->from_constpfx) != 0
245 || strcmp (newp->to_string, root->to_string) != 0)
246 {
247 rootp = &root->same;
248 root = *rootp;
249 if (root == NULL)
250 break;
251 }
252
253 if (root != NULL)
254 /* This is a no new conversion. */
255 return;
256
257 break;
258 }
259
260 /* The new element either has a prefix which is itself a
261 prefix for the prefix of the current node or vice verse.
262 In the first case we insert the node right here. Otherwise
263 we have to descent further. */
264 if (newp->from_constpfx_len < root->from_constpfx_len)
265 {
266 newp->matching = root;
267 break;
268 }
269
270 rootp = &root->matching;
271 }
272 else if (cmpres < 0)
273 rootp = &root->left;
274 else
275 rootp = &root->right;
276 }
277
278 /* Plug in the new node here. */
279 *rootp = newp;
280 }
281
282
283 /* Add new module. */
284 static inline void
285 internal_function
286 add_module (char *rp, const char *directory, size_t dir_len, void **modules,
287 size_t *nmodules, int modcounter)
288 {
289 /* We expect now
290 1. `from' name
291 2. `to' name
292 3. filename of the module
293 4. an optional cost value
294 */
295 struct gconv_module *new_module;
296 char *from, *to, *module, *wp;
297 size_t const_len;
298 int from_is_regex;
299 int need_ext;
300 int cost_hi;
301
302 while (isspace (*rp))
303 ++rp;
304 from = rp;
305 from_is_regex = 0;
306 while (*rp != '\0' && !isspace (*rp))
307 {
308 if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
309 && *rp != '_' && *rp != '(' && *rp != ')')
310 from_is_regex = 1;
311 *rp = toupper (*rp);
312 ++rp;
313 }
314 if (*rp == '\0')
315 return;
316 *rp++ = '\0';
317 to = wp = rp;
318 while (isspace (*rp))
319 ++rp;
320 while (*rp != '\0' && !isspace (*rp))
321 *wp++ = toupper (*rp++);
322 if (*rp == '\0')
323 return;
324 *wp++ = '\0';
325 do
326 ++rp;
327 while (isspace (*rp));
328 module = wp;
329 while (*rp != '\0' && !isspace (*rp))
330 *wp++ = *rp++;
331 if (*rp == '\0')
332 {
333 /* There is no cost, use one by default. */
334 *wp++ = '\0';
335 cost_hi = 1;
336 }
337 else
338 {
339 /* There might be a cost value. */
340 char *endp;
341
342 *wp++ = '\0';
343 cost_hi = strtol (rp, &endp, 10);
344 if (rp == endp || cost_hi < 1)
345 /* No useful information. */
346 cost_hi = 1;
347 }
348
349 if (module[0] == '\0')
350 /* No module name given. */
351 return;
352 if (module[0] == '/')
353 dir_len = 0;
354 else
355 /* Increment by one for the slash. */
356 ++dir_len;
357
358 /* See whether we must add the ending. */
359 need_ext = 0;
360 if (wp - module < sizeof (gconv_module_ext)
361 || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
362 sizeof (gconv_module_ext)) != 0)
363 /* We must add the module extension. */
364 need_ext = sizeof (gconv_module_ext) - 1;
365
366 /* We've collected all the information, now create an entry. */
367
368 if (from_is_regex)
369 {
370 const_len = 0;
371 while (isalnum (from[const_len]) || from[const_len] == '-'
372 || from[const_len] == '/' || from[const_len] == '.'
373 || from[const_len] == '_')
374 ++const_len;
375 }
376 else
377 const_len = to - from - 1;
378
379 new_module = (struct gconv_module *) calloc (1,
380 sizeof (struct gconv_module)
381 + (wp - from)
382 + dir_len + need_ext);
383 if (new_module != NULL)
384 {
385 char *tmp;
386
387 new_module->from_constpfx = memcpy ((char *) new_module
388 + sizeof (struct gconv_module),
389 from, to - from);
390 if (from_is_regex)
391 new_module->from_pattern = new_module->from_constpfx;
392
393 new_module->from_constpfx_len = const_len;
394
395 new_module->to_string = memcpy ((char *) new_module->from_constpfx
396 + (to - from), to, module - to);
397
398 new_module->cost_hi = cost_hi;
399 new_module->cost_lo = modcounter;
400
401 new_module->module_name = (char *) new_module->to_string + (module - to);
402
403 if (dir_len == 0)
404 tmp = (char *) new_module->module_name;
405 else
406 {
407 tmp = __mempcpy ((char *) new_module->module_name,
408 directory, dir_len - 1);
409 *tmp++ = '/';
410 }
411
412 tmp = __mempcpy (tmp, module, wp - module);
413
414 if (need_ext)
415 memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
416
417 /* See whether we have already an alias with this name defined.
418 We do allow regular expressions matching this any alias since
419 this expression can also match other names and we test for aliases
420 before testing for modules. */
421 if (! from_is_regex)
422 {
423 struct gconv_alias fake_alias;
424
425 fake_alias.fromname = new_module->from_constpfx;
426
427 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
428 != NULL)
429 {
430 /* This module duplicates an alias. */
431 free (new_module);
432 return;
433 }
434 }
435
436 /* Now insert the new module data structure in our search tree. */
437 insert_module (new_module);
438 }
439 }
440
441
442 /* Read the next configuration file. */
443 static void
444 internal_function
445 read_conf_file (const char *filename, const char *directory, size_t dir_len,
446 void **modules, size_t *nmodules)
447 {
448 FILE *fp = fopen (filename, "r");
449 char *line = NULL;
450 size_t line_len = 0;
451 int modcounter = 0;
452
453 /* Don't complain if a file is not present or readable, simply silently
454 ignore it. */
455 if (fp == NULL)
456 return;
457
458 /* Process the known entries of the file. Comments start with `#' and
459 end with the end of the line. Empty lines are ignored. */
460 while (!feof_unlocked (fp))
461 {
462 char *rp, *endp, *word;
463 ssize_t n = __getdelim (&line, &line_len, '\n', fp);
464 if (n < 0)
465 /* An error occurred. */
466 break;
467
468 rp = line;
469 /* Terminate the line (excluding comments or newline) by an NUL byte
470 to simplify the following code. */
471 endp = strchr (rp, '#');
472 if (endp != NULL)
473 *endp = '\0';
474 else
475 if (rp[n - 1] == '\n')
476 rp[n - 1] = '\0';
477
478 while (isspace (*rp))
479 ++rp;
480
481 /* If this is an empty line go on with the next one. */
482 if (rp == endp)
483 continue;
484
485 word = rp;
486 while (*rp != '\0' && !isspace (*rp))
487 ++rp;
488
489 if (rp - word == sizeof ("alias") - 1
490 && memcmp (word, "alias", sizeof ("alias") - 1) == 0)
491 add_alias (rp, *modules);
492 else if (rp - word == sizeof ("module") - 1
493 && memcmp (word, "module", sizeof ("module") - 1) == 0)
494 add_module (rp, directory, dir_len, modules, nmodules, modcounter++);
495 /* else */
496 /* Otherwise ignore the line. */
497 }
498
499 if (line != NULL)
500 free (line);
501 fclose (fp);
502 }
503
504
505 /* Read all configuration files found in the user-specified and the default
506 path. */
507 void
508 __gconv_read_conf (void)
509 {
510 const char *user_path = __secure_getenv ("GCONV_PATH");
511 char *gconv_path, *elem;
512 void *modules = NULL;
513 size_t nmodules = 0;
514 int save_errno = errno;
515 size_t cnt;
516
517 if (user_path == NULL)
518 /* No user-defined path. Make a modifiable copy of the default path. */
519 gconv_path = strdupa (default_gconv_path);
520 else
521 {
522 /* Append the default path to the user-defined path. */
523 size_t user_len = strlen (user_path);
524
525 gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path));
526 __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
527 ":", 1),
528 default_gconv_path, sizeof (default_gconv_path));
529 }
530
531 elem = __strtok_r (gconv_path, ":", &gconv_path);
532 while (elem != NULL)
533 {
534 #ifndef MAXPATHLEN
535 /* We define a reasonable limit. */
536 # define MAXPATHLEN 4096
537 #endif
538 char real_elem[MAXPATHLEN];
539
540 if (__realpath (elem, real_elem) != NULL)
541 {
542 size_t elem_len = strlen (real_elem);
543 char *filename;
544
545 filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
546 __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
547 "/", 1),
548 gconv_conf_filename, sizeof (gconv_conf_filename));
549
550 /* Read the next configuration file. */
551 read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
552 }
553
554 /* Get next element in the path. */
555 elem = __strtok_r (NULL, ":", &gconv_path);
556 }
557
558 /* Add the internal modules. */
559 for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
560 ++cnt)
561 {
562 if (builtin_modules[cnt].from_pattern == NULL)
563 {
564 struct gconv_alias fake_alias;
565
566 fake_alias.fromname = builtin_modules[cnt].from_constpfx;
567
568 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
569 != NULL)
570 /* It'll conflict so don't add it. */
571 continue;
572 }
573
574 insert_module (&builtin_modules[cnt]);
575 }
576
577 /* Add aliases for builtin conversions. */
578 cnt = sizeof (builtin_aliases) / sizeof (builtin_aliases[0]);
579 while (cnt > 0)
580 {
581 char *copy = strdupa (builtin_aliases[--cnt]);
582 add_alias (copy, modules);
583 }
584
585 /* Restore the error number. */
586 __set_errno (save_errno);
587 }