]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
Update.
[thirdparty/glibc.git] / iconv / gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27
28 #include <ldsodefs.h>
29 #include <gconv_int.h>
30
31
32 /* Simple data structure for alias mapping. We have two names, `from'
33 and `to'. */
34 void *__gconv_alias_db;
35
36 /* Array with available modules. */
37 struct gconv_module *__gconv_modules_db;
38
39 /* We modify global data. */
40 __libc_lock_define_initialized (static, lock)
41
42
43 /* Function for searching alias. */
44 int
45 __gconv_alias_compare (const void *p1, const void *p2)
46 {
47 struct gconv_alias *s1 = (struct gconv_alias *) p1;
48 struct gconv_alias *s2 = (struct gconv_alias *) p2;
49 return strcmp (s1->fromname, s2->fromname);
50 }
51
52
53 /* To search for a derivation we create a list of intermediate steps.
54 Each element contains a pointer to the element which precedes it
55 in the derivation order. */
56 struct derivation_step
57 {
58 const char *result_set;
59 size_t result_set_len;
60 int cost_lo;
61 int cost_hi;
62 struct gconv_module *code;
63 struct derivation_step *last;
64 struct derivation_step *next;
65 };
66
67 #define NEW_STEP(result, hi, lo, module, last_mod) \
68 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
69 newp->result_set = result; \
70 newp->result_set_len = strlen (result); \
71 newp->cost_hi = hi; \
72 newp->cost_lo = lo; \
73 newp->code = module; \
74 newp->last = last_mod; \
75 newp->next = NULL; \
76 newp; })
77
78
79 /* If a specific transformation is used more than once we should not need
80 to start looking for it again. Instead cache each successful result. */
81 struct known_derivation
82 {
83 const char *from;
84 const char *to;
85 struct gconv_step *steps;
86 size_t nsteps;
87 };
88
89 /* Compare function for database of found derivations. */
90 static int
91 derivation_compare (const void *p1, const void *p2)
92 {
93 struct known_derivation *s1 = (struct known_derivation *) p1;
94 struct known_derivation *s2 = (struct known_derivation *) p2;
95 int result;
96
97 result = strcmp (s1->from, s2->from);
98 if (result == 0)
99 result = strcmp (s1->to, s2->to);
100 return result;
101 }
102
103 /* The search tree for known derivations. */
104 static void *known_derivations;
105
106 /* Look up whether given transformation was already requested before. */
107 static int
108 internal_function
109 derivation_lookup (const char *fromset, const char *toset,
110 struct gconv_step **handle, size_t *nsteps)
111 {
112 struct known_derivation key = { fromset, toset, NULL, 0 };
113 struct known_derivation **result;
114
115 result = __tfind (&key, &known_derivations, derivation_compare);
116
117 if (result == NULL)
118 return GCONV_NOCONV;
119
120 *handle = (*result)->steps;
121 *nsteps = (*result)->nsteps;
122
123 /* Please note that we return GCONV_OK even if the last search for
124 this transformation was unsuccessful. */
125 return GCONV_OK;
126 }
127
128 /* Add new derivation to list of known ones. */
129 static void
130 internal_function
131 add_derivation (const char *fromset, const char *toset,
132 struct gconv_step *handle, size_t nsteps)
133 {
134 struct known_derivation *new_deriv;
135 size_t fromset_len = strlen (fromset) + 1;
136 size_t toset_len = strlen (toset) + 1;
137
138 new_deriv = (struct known_derivation *)
139 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
140 if (new_deriv != NULL)
141 {
142 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
143 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
144 toset, toset_len);
145
146 new_deriv->steps = handle;
147 new_deriv->nsteps = nsteps;
148
149 __tsearch (new_deriv, &known_derivations, derivation_compare);
150 }
151 /* Please note that we don't complain if the allocation failed. This
152 is not tragically but in case we use the memory debugging facilities
153 not all memory will be freed. */
154 }
155
156 static void
157 free_derivation (void *p)
158 {
159 struct known_derivation *deriv = (struct known_derivation *) p;
160 size_t cnt;
161
162 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
163 if (deriv->steps[cnt].end_fct)
164 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
165
166 /* Free the name strings. */
167 free ((char *) deriv->steps[0].from_name);
168 free ((char *) deriv->steps[deriv->nsteps - 1].to_name);
169
170 free ((struct gconv_step *) deriv->steps);
171 free (deriv);
172 }
173
174
175 static int
176 internal_function
177 gen_steps (struct derivation_step *best, const char *toset,
178 const char *fromset, struct gconv_step **handle, size_t *nsteps)
179 {
180 size_t step_cnt = 0;
181 struct gconv_step *result;
182 struct derivation_step *current;
183 int status = GCONV_NOMEM;
184
185 /* First determine number of steps. */
186 for (current = best; current->last != NULL; current = current->last)
187 ++step_cnt;
188
189 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
190 * step_cnt);
191 if (result != NULL)
192 {
193 int failed = 0;
194
195 status = GCONV_OK;
196 *nsteps = step_cnt;
197 current = best;
198 while (step_cnt-- > 0)
199 {
200 result[step_cnt].from_name = (step_cnt == 0
201 ? __strdup (fromset)
202 : current->last->result_set);
203 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
204 ? __strdup (current->result_set)
205 : result[step_cnt + 1].from_name);
206
207 #ifndef STATIC_GCONV
208 if (current->code->module_name[0] == '/')
209 {
210 /* Load the module, return handle for it. */
211 struct gconv_loaded_object *shlib_handle =
212 __gconv_find_shlib (current->code->module_name);
213
214 if (shlib_handle == NULL)
215 {
216 failed = 1;
217 break;
218 }
219
220 result[step_cnt].shlib_handle = shlib_handle;
221 result[step_cnt].modname = shlib_handle->name;
222 result[step_cnt].counter = 0;
223 result[step_cnt].fct = shlib_handle->fct;
224 result[step_cnt].init_fct = shlib_handle->init_fct;
225 result[step_cnt].end_fct = shlib_handle->end_fct;
226 }
227 else
228 #endif
229 /* It's a builtin transformation. */
230 __gconv_get_builtin_trans (current->code->module_name,
231 &result[step_cnt]);
232
233 /* Call the init function. */
234 if (result[step_cnt].init_fct != NULL)
235 {
236 status = _CALL_DL_FCT (result[step_cnt].init_fct,
237 (&result[step_cnt]));
238
239 if (status != GCONV_OK)
240 {
241 failed = 1;
242 /* Make sure we unload this modules. */
243 --step_cnt;
244 break;
245 }
246 }
247
248 current = current->last;
249 }
250
251 if (failed != 0)
252 {
253 /* Something went wrong while initializing the modules. */
254 while (++step_cnt < *nsteps)
255 {
256 if (result[step_cnt].end_fct != NULL)
257 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
258 #ifndef STATIC_GCONV
259 __gconv_release_shlib (result[step_cnt].shlib_handle);
260 #endif
261 }
262 free (result);
263 *nsteps = 0;
264 *handle = NULL;
265 if (status == GCONV_OK)
266 status = GCONV_NOCONV;
267 }
268 else
269 *handle = result;
270 }
271 else
272 {
273 *nsteps = 0;
274 *handle = NULL;
275 }
276
277 return status;
278 }
279
280
281 /* The main function: find a possible derivation from the `fromset' (either
282 the given name or the alias) to the `toset' (again with alias). */
283 static int
284 internal_function
285 find_derivation (const char *toset, const char *toset_expand,
286 const char *fromset, const char *fromset_expand,
287 struct gconv_step **handle, size_t *nsteps)
288 {
289 __libc_lock_define_initialized (static, lock)
290 struct derivation_step *first, *current, **lastp, *solution = NULL;
291 int best_cost_hi = INT_MAX;
292 int best_cost_lo = INT_MAX;
293 int result;
294
295 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
296 handle, nsteps);
297 if (result == GCONV_OK)
298 return result;
299
300 __libc_lock_lock (lock);
301
302 /* There is a small chance that this derivation is meanwhile found. This
303 can happen if in `find_derivation' we look for this derivation, didn't
304 find it but at the same time another thread looked for this derivation. */
305 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
306 handle, nsteps);
307 if (result == GCONV_OK)
308 {
309 __libc_lock_unlock (lock);
310 return result;
311 }
312
313 /* For now we use a simple algorithm with quadratic runtime behaviour.
314 The task is to match the `toset' with any of the available rules,
315 starting from FROMSET. */
316 if (fromset_expand != NULL)
317 {
318 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
319 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
320 lastp = &first->next->next;
321 }
322 else
323 {
324 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
325 lastp = &first->next;
326 }
327
328 for (current = first; current != NULL; current = current->next)
329 {
330 /* Now match all the available module specifications against the
331 current charset name. If any of them matches check whether
332 we already have a derivation for this charset. If yes, use the
333 one with the lower costs. Otherwise add the new charset at the
334 end.
335
336 The module database is organized in a tree form which allows to
337 search for prefixes. So we search for the first entry with a
338 matching prefix and any other matching entry can be found from
339 this place. */
340 struct gconv_module *node = __gconv_modules_db;
341
342 /* Maybe it is not necessary anymore to look for a solution for
343 this entry since the cost is already as high (or heigher) as
344 the cost for the best solution so far. */
345 if (current->cost_hi > best_cost_hi
346 || (current->cost_hi == best_cost_hi
347 && current->cost_lo >= best_cost_lo))
348 continue;
349
350 while (node != NULL)
351 {
352 int cmpres = strncmp (current->result_set, node->from_constpfx,
353 MIN (current->result_set_len,
354 node->from_constpfx_len));
355
356 if (cmpres == 0)
357 {
358 /* Walk through the list of modules with this prefix and
359 try to match the name. */
360 struct gconv_module *runp;
361
362 if (current->result_set_len < node->from_constpfx_len)
363 /* Cannot possibly match. */
364 break;
365
366 /* Check all the modules with this prefix. */
367 runp = node;
368 do
369 {
370 const char *result_set = NULL;
371
372 if (runp->from_pattern == NULL)
373 {
374 /* This is a simple entry and therefore we have a
375 found an matching entry if the strings are really
376 equal. */
377 if (current->result_set_len == runp->from_constpfx_len)
378 {
379 if (strcmp (runp->to_string, "-") == 0)
380 result_set = toset_expand ?: toset;
381 else
382 result_set = runp->to_string;
383 }
384 }
385 else
386 {
387 /* Compile the regular expression if necessary. */
388 if (runp->from_regex == NULL)
389 {
390 if (__regcomp (&runp->from_regex_mem,
391 runp->from_pattern,
392 REG_EXTENDED | REG_ICASE) != 0)
393 /* Something is wrong. Remember this. */
394 runp->from_regex = (regex_t *) -1L;
395 else
396 runp->from_regex = &runp->from_regex_mem;
397 }
398
399 if (runp->from_regex != (regex_t *) -1L)
400 {
401 regmatch_t match[4];
402
403 /* Try to match the regular expression. */
404 if (__regexec (runp->from_regex, current->result_set,
405 4, match, 0) == 0
406 && match[0].rm_so == 0
407 && current->result_set[match[0].rm_eo] == '\0')
408 {
409 /* At least the whole <from> string is matched.
410 We must now match sed-like possible
411 subexpressions from the match to the
412 toset expression. */
413 #define ENSURE_LEN(LEN) \
414 if (wp + (LEN) >= constr + len - 1) \
415 { \
416 char *newp = alloca (len += 128); \
417 wp = __mempcpy (newp, constr, wp - constr); \
418 constr = newp; \
419 }
420 size_t len = 128;
421 char *constr = alloca (len);
422 char *wp = constr;
423 const char *cp = runp->to_string;
424
425 while (*cp != '\0')
426 {
427 if (*cp != '\\')
428 {
429 ENSURE_LEN (1);
430 *wp++ = *cp++;
431 }
432 else if (cp[1] == '\0')
433 /* Backslash at end of string. */
434 break;
435 else
436 {
437 ++cp;
438 if (*cp == '\\')
439 {
440 *wp++ = *cp++;
441 ENSURE_LEN (1);
442 }
443 else if (*cp < '1' || *cp > '3')
444 break;
445 else
446 {
447 int idx = *cp - '0';
448 if (match[idx].rm_so == -1)
449 /* No match. */
450 break;
451
452 ENSURE_LEN (match[idx].rm_eo
453 - match[idx].rm_so);
454 wp = __mempcpy (wp,
455 &current->result_set[match[idx].rm_so],
456 match[idx].rm_eo
457 - match[idx].rm_so);
458 ++cp;
459 }
460 }
461 }
462 if (*cp == '\0' && wp != constr)
463 {
464 /* Terminate the constructed string. */
465 *wp = '\0';
466 result_set = constr;
467 }
468 }
469 }
470 }
471
472 if (result_set != NULL)
473 {
474 int cost_hi = runp->cost_hi + current->cost_hi;
475 int cost_lo = runp->cost_lo + current->cost_lo;
476 struct derivation_step *step;
477
478 /* We managed to find a derivation. First see whether
479 this is what we are looking for. */
480 if (strcmp (result_set, toset) == 0
481 || (toset_expand != NULL
482 && strcmp (result_set, toset_expand) == 0))
483 {
484 if (solution == NULL || cost_hi < best_cost_hi
485 || (cost_hi == best_cost_hi
486 && cost_lo < best_cost_lo))
487 {
488 best_cost_hi = cost_hi;
489 best_cost_lo = cost_lo;
490 }
491
492 /* Append this solution to list. */
493 if (solution == NULL)
494 solution = NEW_STEP (result_set, 0, 0, runp,
495 current);
496 else
497 {
498 while (solution->next != NULL)
499 solution = solution->next;
500
501 solution->next = NEW_STEP (result_set, 0, 0,
502 runp, current);
503 }
504 }
505 else if (cost_hi < best_cost_hi
506 || (cost_hi == best_cost_hi
507 && cost_lo < best_cost_lo))
508 {
509 /* Append at the end if there is no entry with
510 this name. */
511 for (step = first; step != NULL; step = step->next)
512 if (strcmp (result_set, step->result_set) == 0)
513 break;
514
515 if (step == NULL)
516 {
517 *lastp = NEW_STEP (result_set,
518 cost_hi, cost_lo,
519 runp, current);
520 lastp = &(*lastp)->next;
521 }
522 else if (step->cost_hi > cost_hi
523 || (step->cost_hi == cost_hi
524 && step->cost_lo > cost_lo))
525 {
526 step->code = runp;
527 step->last = current;
528
529 /* Update the cost for all steps. */
530 for (step = first; step != NULL;
531 step = step->next)
532 {
533 struct derivation_step *back;
534
535 if (step->code == NULL)
536 /* This is one of the entries we started
537 from. */
538 continue;
539
540 step->cost_hi = step->code->cost_hi;
541 step->cost_lo = step->code->cost_lo;
542
543 for (back = step->last; back->code != NULL;
544 back = back->last)
545 {
546 step->cost_hi += back->code->cost_hi;
547 step->cost_lo += back->code->cost_lo;
548 }
549 }
550
551 for (step = solution; step != NULL;
552 step = step->next)
553 {
554 step->cost_hi = (step->code->cost_hi
555 + step->last->cost_hi);
556 step->cost_lo = (step->code->cost_lo
557 + step->last->cost_lo);
558
559 if (step->cost_hi < best_cost_hi
560 || (step->cost_hi == best_cost_hi
561 && step->cost_lo < best_cost_lo))
562 {
563 solution = step;
564 best_cost_hi = step->cost_hi;
565 best_cost_lo = step->cost_lo;
566 }
567 }
568 }
569 }
570 }
571
572 runp = runp->same;
573 }
574 while (runp != NULL);
575
576 if (current->result_set_len == node->from_constpfx_len)
577 break;
578
579 node = node->matching;
580 }
581 else if (cmpres < 0)
582 node = node->left;
583 else
584 node = node->right;
585 }
586 }
587
588 if (solution != NULL)
589 /* We really found a way to do the transformation. Now build a data
590 structure describing the transformation steps.*/
591 result = gen_steps (solution, toset_expand ?: toset,
592 fromset_expand ?: fromset, handle, nsteps);
593 else
594 {
595 /* We haven't found a transformation. Clear the result values. */
596 *handle = NULL;
597 *nsteps = 0;
598 }
599
600 /* Add result in any case to list of known derivations. */
601 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
602 *handle, *nsteps);
603
604 __libc_lock_unlock (lock);
605
606 return result;
607 }
608
609
610 int
611 internal_function
612 __gconv_find_transform (const char *toset, const char *fromset,
613 struct gconv_step **handle, size_t *nsteps)
614 {
615 __libc_once_define (static, once);
616 const char *fromset_expand = NULL;
617 const char *toset_expand = NULL;
618 int result;
619
620 /* Ensure that the configuration data is read. */
621 __libc_once (once, __gconv_read_conf);
622
623 /* Acquire the lock. */
624 __libc_lock_lock (lock);
625
626 /* If we don't have a module database return with an error. */
627 if (__gconv_modules_db == NULL)
628 {
629 __libc_lock_unlock (lock);
630 return GCONV_NOCONV;
631 }
632
633 /* See whether the names are aliases. */
634 if (__gconv_alias_db != NULL)
635 {
636 struct gconv_alias key;
637 struct gconv_alias **found;
638
639 key.fromname = fromset;
640 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
641 fromset_expand = found != NULL ? (*found)->toname : NULL;
642
643 key.fromname = toset;
644 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
645 toset_expand = found != NULL ? (*found)->toname : NULL;
646 }
647
648 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
649 handle, nsteps);
650
651 #ifndef STATIC_GCONV
652 /* Increment the user counter. */
653 if (result == GCONV_OK)
654 {
655 size_t cnt = *nsteps;
656 struct gconv_step *steps = *handle;
657
658 while (cnt > 0)
659 if (steps[--cnt].counter++ == 0)
660 {
661 steps[cnt].shlib_handle =
662 __gconv_find_shlib (steps[cnt].modname);
663 if (steps[cnt].shlib_handle == NULL)
664 {
665 /* Oops, this is the second time we use this module (after
666 unloading) and this time loading failed!? */
667 while (++cnt < *nsteps)
668 __gconv_release_shlib (steps[cnt].shlib_handle);
669 result = GCONV_NOCONV;
670 break;
671 }
672 }
673 }
674 #endif
675
676 /* Release the lock. */
677 __libc_lock_unlock (lock);
678
679 /* The following code is necessary since `find_derivation' will return
680 GCONV_OK even when no derivation was found but the same request
681 was processed before. I.e., negative results will also be cached. */
682 return (result == GCONV_OK
683 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
684 : result);
685 }
686
687
688 /* Release the entries of the modules list. */
689 int
690 internal_function
691 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
692 {
693 int result = GCONV_OK;
694
695 #ifndef STATIC_GCONV
696 /* Acquire the lock. */
697 __libc_lock_lock (lock);
698
699 while (nsteps-- > 0)
700 if (steps[nsteps].shlib_handle != NULL
701 && --steps[nsteps].counter == 0)
702 {
703 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
704 if (result != GCONV_OK)
705 break;
706 steps[nsteps].shlib_handle = NULL;
707 }
708
709 /* Release the lock. */
710 __libc_lock_unlock (lock);
711 #endif
712
713 return result;
714 }
715
716
717 /* Free the modules mentioned. */
718 static void
719 internal_function
720 free_modules_db (struct gconv_module *node)
721 {
722 if (node->left != NULL)
723 free_modules_db (node->left);
724 if (node->right != NULL)
725 free_modules_db (node->right);
726 if (node->same != NULL)
727 free_modules_db (node->same);
728 do
729 {
730 struct gconv_module *act = node;
731 node = node->matching;
732 if (act->module_name[0] == '/')
733 free (act);
734 }
735 while (node != NULL);
736 }
737
738
739 /* Free all resources if necessary. */
740 static void __attribute__ ((unused))
741 free_mem (void)
742 {
743 if (__gconv_alias_db != NULL)
744 __tdestroy (__gconv_alias_db, free);
745
746 if (__gconv_modules_db != NULL)
747 free_modules_db (__gconv_modules_db);
748
749 if (known_derivations != NULL)
750 __tdestroy (known_derivations, free_derivation);
751 }
752
753 text_set_element (__libc_subfreeres, free_mem);