]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
Update.
[thirdparty/glibc.git] / iconv / gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997,98,99,2000,2001,2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <gconv_charset.h>
31
32
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
36
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
39
40 /* We modify global data. */
41 __libc_lock_define_initialized (static, lock)
42
43
44 /* Provide access to module database. */
45 struct gconv_module *
46 __gconv_get_modules_db (void)
47 {
48 return __gconv_modules_db;
49 }
50
51 void *
52 __gconv_get_alias_db (void)
53 {
54 return __gconv_alias_db;
55 }
56
57
58 /* Function for searching alias. */
59 int
60 __gconv_alias_compare (const void *p1, const void *p2)
61 {
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
65 }
66
67
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
72 {
73 const char *result_set;
74 size_t result_set_len;
75 int cost_lo;
76 int cost_hi;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
80 };
81
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
86 newp->cost_hi = hi; \
87 newp->cost_lo = lo; \
88 newp->code = module; \
89 newp->last = last_mod; \
90 newp->next = NULL; \
91 newp; })
92
93
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
97 {
98 const char *from;
99 const char *to;
100 struct __gconv_step *steps;
101 size_t nsteps;
102 };
103
104 /* Compare function for database of found derivations. */
105 static int
106 derivation_compare (const void *p1, const void *p2)
107 {
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
110 int result;
111
112 result = strcmp (s1->from, s2->from);
113 if (result == 0)
114 result = strcmp (s1->to, s2->to);
115 return result;
116 }
117
118 /* The search tree for known derivations. */
119 static void *known_derivations;
120
121 /* Look up whether given transformation was already requested before. */
122 static int
123 internal_function
124 derivation_lookup (const char *fromset, const char *toset,
125 struct __gconv_step **handle, size_t *nsteps)
126 {
127 struct known_derivation key = { fromset, toset, NULL, 0 };
128 struct known_derivation **result;
129
130 result = __tfind (&key, &known_derivations, derivation_compare);
131
132 if (result == NULL)
133 return __GCONV_NOCONV;
134
135 *handle = (*result)->steps;
136 *nsteps = (*result)->nsteps;
137
138 /* Please note that we return GCONV_OK even if the last search for
139 this transformation was unsuccessful. */
140 return __GCONV_OK;
141 }
142
143 /* Add new derivation to list of known ones. */
144 static void
145 internal_function
146 add_derivation (const char *fromset, const char *toset,
147 struct __gconv_step *handle, size_t nsteps)
148 {
149 struct known_derivation *new_deriv;
150 size_t fromset_len = strlen (fromset) + 1;
151 size_t toset_len = strlen (toset) + 1;
152
153 new_deriv = (struct known_derivation *)
154 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
155 if (new_deriv != NULL)
156 {
157 new_deriv->from = (char *) (new_deriv + 1);
158 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
159 toset, toset_len);
160
161 new_deriv->steps = handle;
162 new_deriv->nsteps = nsteps;
163
164 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
165 == NULL)
166 /* There is some kind of memory allocation problem. */
167 free (new_deriv);
168 }
169 /* Please note that we don't complain if the allocation failed. This
170 is not tragically but in case we use the memory debugging facilities
171 not all memory will be freed. */
172 }
173
174 static void
175 free_derivation (void *p)
176 {
177 struct known_derivation *deriv = (struct known_derivation *) p;
178 size_t cnt;
179
180 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
181 if (deriv->steps[cnt].__counter > 0
182 && deriv->steps[cnt].__end_fct != NULL)
183 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
184
185 /* Free the name strings. */
186 free ((char *) deriv->steps[0].__from_name);
187 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
188
189 free ((struct __gconv_step *) deriv->steps);
190 free (deriv);
191 }
192
193
194 /* Decrement the reference count for a single step in a steps array. */
195 void
196 internal_function
197 __gconv_release_step (struct __gconv_step *step)
198 {
199 if (--step->__counter == 0)
200 {
201 /* Call the destructor. */
202 if (step->__end_fct != NULL)
203 DL_CALL_FCT (step->__end_fct, (step));
204
205 #ifndef STATIC_GCONV
206 /* Skip builtin modules; they are not reference counted. */
207 if (step->__shlib_handle != NULL)
208 {
209 /* Release the loaded module. */
210 __gconv_release_shlib (step->__shlib_handle);
211 step->__shlib_handle = NULL;
212 }
213 #endif
214 }
215 }
216
217 static int
218 internal_function
219 gen_steps (struct derivation_step *best, const char *toset,
220 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
221 {
222 size_t step_cnt = 0;
223 struct __gconv_step *result;
224 struct derivation_step *current;
225 int status = __GCONV_NOMEM;
226
227 /* First determine number of steps. */
228 for (current = best; current->last != NULL; current = current->last)
229 ++step_cnt;
230
231 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
232 * step_cnt);
233 if (result != NULL)
234 {
235 int failed = 0;
236
237 status = __GCONV_OK;
238 *nsteps = step_cnt;
239 current = best;
240 while (step_cnt-- > 0)
241 {
242 result[step_cnt].__from_name = (step_cnt == 0
243 ? __strdup (fromset)
244 : (char *)current->last->result_set);
245 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
246 ? __strdup (current->result_set)
247 : result[step_cnt + 1].__from_name);
248
249 result[step_cnt].__counter = 1;
250 result[step_cnt].__data = NULL;
251
252 #ifndef STATIC_GCONV
253 if (current->code->module_name[0] == '/')
254 {
255 /* Load the module, return handle for it. */
256 struct __gconv_loaded_object *shlib_handle =
257 __gconv_find_shlib (current->code->module_name);
258
259 if (shlib_handle == NULL)
260 {
261 failed = 1;
262 break;
263 }
264
265 result[step_cnt].__shlib_handle = shlib_handle;
266 result[step_cnt].__modname = shlib_handle->name;
267 result[step_cnt].__fct = shlib_handle->fct;
268 result[step_cnt].__init_fct = shlib_handle->init_fct;
269 result[step_cnt].__end_fct = shlib_handle->end_fct;
270
271 /* Call the init function. */
272 if (result[step_cnt].__init_fct != NULL)
273 {
274 status = DL_CALL_FCT (result[step_cnt].__init_fct,
275 (&result[step_cnt]));
276
277 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
278 {
279 failed = 1;
280 /* Make sure we unload this modules. */
281 --step_cnt;
282 result[step_cnt].__end_fct = NULL;
283 break;
284 }
285 }
286 }
287 else
288 #endif
289 /* It's a builtin transformation. */
290 __gconv_get_builtin_trans (current->code->module_name,
291 &result[step_cnt]);
292
293 current = current->last;
294 }
295
296 if (__builtin_expect (failed, 0) != 0)
297 {
298 /* Something went wrong while initializing the modules. */
299 while (++step_cnt < *nsteps)
300 __gconv_release_step (&result[step_cnt]);
301 free (result);
302 *nsteps = 0;
303 *handle = NULL;
304 if (status == __GCONV_OK)
305 status = __GCONV_NOCONV;
306 }
307 else
308 *handle = result;
309 }
310 else
311 {
312 *nsteps = 0;
313 *handle = NULL;
314 }
315
316 return status;
317 }
318
319
320 #ifndef STATIC_GCONV
321 static int
322 internal_function
323 increment_counter (struct __gconv_step *steps, size_t nsteps)
324 {
325 /* Increment the user counter. */
326 size_t cnt = nsteps;
327 int result = __GCONV_OK;
328
329 while (cnt-- > 0)
330 {
331 struct __gconv_step *step = &steps[cnt];
332
333 if (step->__counter++ == 0)
334 {
335 /* Skip builtin modules. */
336 if (step->__modname != NULL)
337 {
338 /* Reopen a previously used module. */
339 step->__shlib_handle = __gconv_find_shlib (step->__modname);
340 if (step->__shlib_handle == NULL)
341 {
342 /* Oops, this is the second time we use this module
343 (after unloading) and this time loading failed!? */
344 --step->__counter;
345 while (++cnt < nsteps)
346 __gconv_release_step (&steps[cnt]);
347 result = __GCONV_NOCONV;
348 break;
349 }
350
351 /* The function addresses defined by the module may
352 have changed. */
353 step->__fct = step->__shlib_handle->fct;
354 step->__init_fct = step->__shlib_handle->init_fct;
355 step->__end_fct = step->__shlib_handle->end_fct;
356 }
357
358 if (step->__init_fct != NULL)
359 DL_CALL_FCT (step->__init_fct, (step));
360 }
361 }
362 return result;
363 }
364 #endif
365
366
367 /* The main function: find a possible derivation from the `fromset' (either
368 the given name or the alias) to the `toset' (again with alias). */
369 static int
370 internal_function
371 find_derivation (const char *toset, const char *toset_expand,
372 const char *fromset, const char *fromset_expand,
373 struct __gconv_step **handle, size_t *nsteps)
374 {
375 struct derivation_step *first, *current, **lastp, *solution = NULL;
376 int best_cost_hi = INT_MAX;
377 int best_cost_lo = INT_MAX;
378 int result;
379
380 /* Look whether an earlier call to `find_derivation' has already
381 computed a possible derivation. If so, return it immediately. */
382 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
383 handle, nsteps);
384 if (result == __GCONV_OK)
385 {
386 #ifndef STATIC_GCONV
387 result = increment_counter (*handle, *nsteps);
388 #endif
389 return result;
390 }
391
392 /* The task is to find a sequence of transformations, backed by the
393 existing modules - whether builtin or dynamically loadable -,
394 starting at `fromset' (or `fromset_expand') and ending at `toset'
395 (or `toset_expand'), and with minimal cost.
396
397 For computer scientists, this is a shortest path search in the
398 graph where the nodes are all possible charsets and the edges are
399 the transformations listed in __gconv_modules_db.
400
401 For now we use a simple algorithm with quadratic runtime behaviour.
402 A breadth-first search, starting at `fromset' and `fromset_expand'.
403 The list starting at `first' contains all nodes that have been
404 visited up to now, in the order in which they have been visited --
405 excluding the goal nodes `toset' and `toset_expand' which get
406 managed in the list starting at `solution'.
407 `current' walks through the list starting at `first' and looks
408 which nodes are reachable from the current node, adding them to
409 the end of the list [`first' or `solution' respectively] (if
410 they are visited the first time) or updating them in place (if
411 they have have already been visited).
412 In each node of either list, cost_lo and cost_hi contain the
413 minimum cost over any paths found up to now, starting at `fromset'
414 or `fromset_expand', ending at that node. best_cost_lo and
415 best_cost_hi represent the minimum over the elements of the
416 `solution' list. */
417
418 if (fromset_expand != NULL)
419 {
420 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
421 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
422 lastp = &first->next->next;
423 }
424 else
425 {
426 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
427 lastp = &first->next;
428 }
429
430 for (current = first; current != NULL; current = current->next)
431 {
432 /* Now match all the available module specifications against the
433 current charset name. If any of them matches check whether
434 we already have a derivation for this charset. If yes, use the
435 one with the lower costs. Otherwise add the new charset at the
436 end.
437
438 The module database is organized in a tree form which allows
439 searching for prefixes. So we search for the first entry with a
440 matching prefix and any other matching entry can be found from
441 this place. */
442 struct gconv_module *node;
443
444 /* Maybe it is not necessary anymore to look for a solution for
445 this entry since the cost is already as high (or higher) as
446 the cost for the best solution so far. */
447 if (current->cost_hi > best_cost_hi
448 || (current->cost_hi == best_cost_hi
449 && current->cost_lo >= best_cost_lo))
450 continue;
451
452 node = __gconv_modules_db;
453 while (node != NULL)
454 {
455 int cmpres = strcmp (current->result_set, node->from_string);
456 if (cmpres == 0)
457 {
458 /* Walk through the list of modules with this prefix and
459 try to match the name. */
460 struct gconv_module *runp;
461
462 /* Check all the modules with this prefix. */
463 runp = node;
464 do
465 {
466 const char *result_set = (strcmp (runp->to_string, "-") == 0
467 ? (toset_expand ?: toset)
468 : runp->to_string);
469 int cost_hi = runp->cost_hi + current->cost_hi;
470 int cost_lo = runp->cost_lo + current->cost_lo;
471 struct derivation_step *step;
472
473 /* We managed to find a derivation. First see whether
474 we have reached one of the goal nodes. */
475 if (strcmp (result_set, toset) == 0
476 || (toset_expand != NULL
477 && strcmp (result_set, toset_expand) == 0))
478 {
479 /* Append to the `solution' list if there
480 is no entry with this name. */
481 for (step = solution; step != NULL; step = step->next)
482 if (strcmp (result_set, step->result_set) == 0)
483 break;
484
485 if (step == NULL)
486 {
487 step = NEW_STEP (result_set,
488 cost_hi, cost_lo,
489 runp, current);
490 step->next = solution;
491 solution = step;
492 }
493 else if (step->cost_hi > cost_hi
494 || (step->cost_hi == cost_hi
495 && step->cost_lo > cost_lo))
496 {
497 /* A better path was found for the node,
498 on the `solution' list. */
499 step->code = runp;
500 step->last = current;
501 step->cost_hi = cost_hi;
502 step->cost_lo = cost_lo;
503 }
504
505 /* Update best_cost accordingly. */
506 if (cost_hi < best_cost_hi
507 || (cost_hi == best_cost_hi
508 && cost_lo < best_cost_lo))
509 {
510 best_cost_hi = cost_hi;
511 best_cost_lo = cost_lo;
512 }
513 }
514 else if (cost_hi < best_cost_hi
515 || (cost_hi == best_cost_hi
516 && cost_lo < best_cost_lo))
517 {
518 /* Append at the end of the `first' list if there
519 is no entry with this name. */
520 for (step = first; step != NULL; step = step->next)
521 if (strcmp (result_set, step->result_set) == 0)
522 break;
523
524 if (step == NULL)
525 {
526 *lastp = NEW_STEP (result_set,
527 cost_hi, cost_lo,
528 runp, current);
529 lastp = &(*lastp)->next;
530 }
531 else if (step->cost_hi > cost_hi
532 || (step->cost_hi == cost_hi
533 && step->cost_lo > cost_lo))
534 {
535 /* A better path was found for the node,
536 on the `first' list. */
537 step->code = runp;
538 step->last = current;
539
540 /* Update the cost for all steps. */
541 for (step = first; step != NULL;
542 step = step->next)
543 /* But don't update the start nodes. */
544 if (step->code != NULL)
545 {
546 struct derivation_step *back;
547 int hi, lo;
548
549 hi = step->code->cost_hi;
550 lo = step->code->cost_lo;
551
552 for (back = step->last; back->code != NULL;
553 back = back->last)
554 {
555 hi += back->code->cost_hi;
556 lo += back->code->cost_lo;
557 }
558
559 step->cost_hi = hi;
560 step->cost_lo = lo;
561 }
562
563 /* Likewise for the nodes on the solution list.
564 Also update best_cost accordingly. */
565 for (step = solution; step != NULL;
566 step = step->next)
567 {
568 step->cost_hi = (step->code->cost_hi
569 + step->last->cost_hi);
570 step->cost_lo = (step->code->cost_lo
571 + step->last->cost_lo);
572
573 if (step->cost_hi < best_cost_hi
574 || (step->cost_hi == best_cost_hi
575 && step->cost_lo < best_cost_lo))
576 {
577 best_cost_hi = step->cost_hi;
578 best_cost_lo = step->cost_lo;
579 }
580 }
581 }
582 }
583
584 runp = runp->same;
585 }
586 while (runp != NULL);
587
588 break;
589 }
590 else if (cmpres < 0)
591 node = node->left;
592 else
593 node = node->right;
594 }
595 }
596
597 if (solution != NULL)
598 {
599 /* We really found a way to do the transformation. */
600
601 /* Choose the best solution. This is easy because we know that
602 the solution list has at most length 2 (one for every possible
603 goal node). */
604 if (solution->next != NULL)
605 {
606 struct derivation_step *solution2 = solution->next;
607
608 if (solution2->cost_hi < solution->cost_hi
609 || (solution2->cost_hi == solution->cost_hi
610 && solution2->cost_lo < solution->cost_lo))
611 solution = solution2;
612 }
613
614 /* Now build a data structure describing the transformation steps. */
615 result = gen_steps (solution, toset_expand ?: toset,
616 fromset_expand ?: fromset, handle, nsteps);
617 }
618 else
619 {
620 /* We haven't found a transformation. Clear the result values. */
621 *handle = NULL;
622 *nsteps = 0;
623 }
624
625 /* Add result in any case to list of known derivations. */
626 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
627 *handle, *nsteps);
628
629 return result;
630 }
631
632
633 /* Control of initialization. */
634 __libc_once_define (static, once);
635
636
637 static const char *
638 do_lookup_alias (const char *name)
639 {
640 struct gconv_alias key;
641 struct gconv_alias **found;
642
643 key.fromname = (char *) name;
644 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
645 return found != NULL ? (*found)->toname : NULL;
646 }
647
648
649 int
650 internal_function
651 __gconv_compare_alias (const char *name1, const char *name2)
652 {
653 int result;
654
655 /* Ensure that the configuration data is read. */
656 __libc_once (once, __gconv_read_conf);
657
658 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
659 result = strcmp (do_lookup_alias (name1) ?: name1,
660 do_lookup_alias (name2) ?: name2);
661
662 return result;
663 }
664
665
666 int
667 internal_function
668 __gconv_find_transform (const char *toset, const char *fromset,
669 struct __gconv_step **handle, size_t *nsteps,
670 int flags)
671 {
672 const char *fromset_expand;
673 const char *toset_expand;
674 int result;
675
676 /* Ensure that the configuration data is read. */
677 __libc_once (once, __gconv_read_conf);
678
679 /* Acquire the lock. */
680 __libc_lock_lock (lock);
681
682 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
683 if (result != __GCONV_NODB)
684 {
685 /* We have a cache and could resolve the request, successful or not. */
686 __libc_lock_unlock (lock);
687 return result;
688 }
689
690 /* If we don't have a module database return with an error. */
691 if (__gconv_modules_db == NULL)
692 {
693 __libc_lock_unlock (lock);
694 return __GCONV_NOCONV;
695 }
696
697 /* See whether the names are aliases. */
698 fromset_expand = do_lookup_alias (fromset);
699 toset_expand = do_lookup_alias (toset);
700
701 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
702 /* We are not supposed to create a pseudo transformation (means
703 copying) when the input and output character set are the same. */
704 && (strcmp (toset, fromset) == 0
705 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
706 || (fromset_expand != NULL
707 && (strcmp (toset, fromset_expand) == 0
708 || (toset_expand != NULL
709 && strcmp (toset_expand, fromset_expand) == 0)))))
710 {
711 /* Both character sets are the same. */
712 __libc_lock_unlock (lock);
713 return __GCONV_NOCONV;
714 }
715
716 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
717 handle, nsteps);
718
719 /* Release the lock. */
720 __libc_lock_unlock (lock);
721
722 /* The following code is necessary since `find_derivation' will return
723 GCONV_OK even when no derivation was found but the same request
724 was processed before. I.e., negative results will also be cached. */
725 return (result == __GCONV_OK
726 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
727 : result);
728 }
729
730
731 /* Release the entries of the modules list. */
732 int
733 internal_function
734 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
735 {
736 int result = __GCONV_OK;
737 size_t cnt;
738
739 /* Acquire the lock. */
740 __libc_lock_lock (lock);
741
742 #ifndef STATIC_GCONV
743 cnt = nsteps;
744 while (cnt-- > 0)
745 __gconv_release_step (&steps[cnt]);
746 #endif
747
748 /* If we use the cache we free a bit more since we don't keep any
749 transformation records around, they are cheap enough to
750 recreate. */
751 __gconv_release_cache (steps, nsteps);
752
753 /* Release the lock. */
754 __libc_lock_unlock (lock);
755
756 return result;
757 }
758
759
760 /* Free the modules mentioned. */
761 static void
762 internal_function
763 free_modules_db (struct gconv_module *node)
764 {
765 if (node->left != NULL)
766 free_modules_db (node->left);
767 if (node->right != NULL)
768 free_modules_db (node->right);
769 do
770 {
771 struct gconv_module *act = node;
772 node = node->same;
773 if (act->module_name[0] == '/')
774 free (act);
775 }
776 while (node != NULL);
777 }
778
779
780 /* Free all resources if necessary. */
781 static void __attribute__ ((unused))
782 free_mem (void)
783 {
784 if (__gconv_alias_db != NULL)
785 __tdestroy (__gconv_alias_db, free);
786
787 if (__gconv_modules_db != NULL)
788 free_modules_db (__gconv_modules_db);
789
790 if (known_derivations != NULL)
791 __tdestroy (known_derivations, free_derivation);
792 }
793
794 text_set_element (__libc_subfreeres, free_mem);