]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
Update.
[thirdparty/glibc.git] / iconv / gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27
28 #include <ldsodefs.h>
29 #include <gconv_int.h>
30
31
32 /* Simple data structure for alias mapping. We have two names, `from'
33 and `to'. */
34 void *__gconv_alias_db;
35
36 /* Array with available modules. */
37 struct gconv_module *__gconv_modules_db;
38
39 /* We modify global data. */
40 __libc_lock_define_initialized (static, lock)
41
42
43 /* Function for searching alias. */
44 int
45 __gconv_alias_compare (const void *p1, const void *p2)
46 {
47 struct gconv_alias *s1 = (struct gconv_alias *) p1;
48 struct gconv_alias *s2 = (struct gconv_alias *) p2;
49 return strcmp (s1->fromname, s2->fromname);
50 }
51
52
53 /* To search for a derivation we create a list of intermediate steps.
54 Each element contains a pointer to the element which precedes it
55 in the derivation order. */
56 struct derivation_step
57 {
58 const char *result_set;
59 size_t result_set_len;
60 int cost_lo;
61 int cost_hi;
62 struct gconv_module *code;
63 struct derivation_step *last;
64 struct derivation_step *next;
65 };
66
67 #define NEW_STEP(result, hi, lo, module, last_mod) \
68 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
69 newp->result_set = result; \
70 newp->result_set_len = strlen (result); \
71 newp->cost_hi = hi; \
72 newp->cost_lo = lo; \
73 newp->code = module; \
74 newp->last = last_mod; \
75 newp->next = NULL; \
76 newp; })
77
78
79 /* If a specific transformation is used more than once we should not need
80 to start looking for it again. Instead cache each successful result. */
81 struct known_derivation
82 {
83 const char *from;
84 const char *to;
85 struct gconv_step *steps;
86 size_t nsteps;
87 };
88
89 /* Compare function for database of found derivations. */
90 static int
91 derivation_compare (const void *p1, const void *p2)
92 {
93 struct known_derivation *s1 = (struct known_derivation *) p1;
94 struct known_derivation *s2 = (struct known_derivation *) p2;
95 int result;
96
97 result = strcmp (s1->from, s2->from);
98 if (result == 0)
99 result = strcmp (s1->to, s2->to);
100 return result;
101 }
102
103 /* The search tree for known derivations. */
104 static void *known_derivations;
105
106 /* Look up whether given transformation was already requested before. */
107 static int
108 internal_function
109 derivation_lookup (const char *fromset, const char *toset,
110 struct gconv_step **handle, size_t *nsteps)
111 {
112 struct known_derivation key = { fromset, toset, NULL, 0 };
113 struct known_derivation **result;
114
115 result = __tfind (&key, &known_derivations, derivation_compare);
116
117 if (result == NULL)
118 return GCONV_NOCONV;
119
120 *handle = (*result)->steps;
121 *nsteps = (*result)->nsteps;
122
123 /* Please note that we return GCONV_OK even if the last search for
124 this transformation was unsuccessful. */
125 return GCONV_OK;
126 }
127
128 /* Add new derivation to list of known ones. */
129 static void
130 internal_function
131 add_derivation (const char *fromset, const char *toset,
132 struct gconv_step *handle, size_t nsteps)
133 {
134 struct known_derivation *new_deriv;
135 size_t fromset_len = strlen (fromset) + 1;
136 size_t toset_len = strlen (toset) + 1;
137
138 new_deriv = (struct known_derivation *)
139 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
140 if (new_deriv != NULL)
141 {
142 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
143 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
144 toset, toset_len);
145
146 new_deriv->steps = handle;
147 new_deriv->nsteps = nsteps;
148
149 __tsearch (new_deriv, &known_derivations, derivation_compare);
150 }
151 /* Please note that we don't complain if the allocation failed. This
152 is not tragically but in case we use the memory debugging facilities
153 not all memory will be freed. */
154 }
155
156 static void
157 free_derivation (void *p)
158 {
159 struct known_derivation *deriv = (struct known_derivation *) p;
160 size_t cnt;
161
162 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
163 if (deriv->steps[cnt].end_fct)
164 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
165
166 free ((struct gconv_step *) deriv->steps);
167 free (deriv);
168 }
169
170
171 static int
172 internal_function
173 gen_steps (struct derivation_step *best, const char *toset,
174 const char *fromset, struct gconv_step **handle, size_t *nsteps)
175 {
176 size_t step_cnt = 0;
177 struct gconv_step *result;
178 struct derivation_step *current;
179 int status = GCONV_NOMEM;
180
181 /* First determine number of steps. */
182 for (current = best; current->last != NULL; current = current->last)
183 ++step_cnt;
184
185 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
186 * step_cnt);
187 if (result != NULL)
188 {
189 int failed = 0;
190
191 status = GCONV_OK;
192 *nsteps = step_cnt;
193 current = best;
194 while (step_cnt-- > 0)
195 {
196 result[step_cnt].from_name = (step_cnt == 0
197 ? __strdup (fromset)
198 : current->last->result_set);
199 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
200 ? __strdup (current->result_set)
201 : result[step_cnt + 1].from_name);
202
203 #ifndef STATIC_GCONV
204 if (current->code->module_name[0] == '/')
205 {
206 /* Load the module, return handle for it. */
207 struct gconv_loaded_object *shlib_handle =
208 __gconv_find_shlib (current->code->module_name);
209
210 if (shlib_handle == NULL)
211 {
212 failed = 1;
213 break;
214 }
215
216 result[step_cnt].shlib_handle = shlib_handle;
217 result[step_cnt].modname = shlib_handle->name;
218 result[step_cnt].counter = 0;
219 result[step_cnt].fct = shlib_handle->fct;
220 result[step_cnt].init_fct = shlib_handle->init_fct;
221 result[step_cnt].end_fct = shlib_handle->end_fct;
222 }
223 else
224 #endif
225 /* It's a builtin transformation. */
226 __gconv_get_builtin_trans (current->code->module_name,
227 &result[step_cnt]);
228
229 /* Call the init function. */
230 if (result[step_cnt].init_fct != NULL)
231 {
232 status = _CALL_DL_FCT (result[step_cnt].init_fct,
233 (&result[step_cnt]));
234
235 if (status != GCONV_OK)
236 {
237 failed = 1;
238 /* Make sure we unload this modules. */
239 --step_cnt;
240 break;
241 }
242 }
243
244 current = current->last;
245 }
246
247 if (failed != 0)
248 {
249 /* Something went wrong while initializing the modules. */
250 while (++step_cnt < *nsteps)
251 {
252 if (result[step_cnt].end_fct != NULL)
253 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
254 #ifndef STATIC_GCONV
255 __gconv_release_shlib (result[step_cnt].shlib_handle);
256 #endif
257 }
258 free (result);
259 *nsteps = 0;
260 *handle = NULL;
261 if (status == GCONV_OK)
262 status = GCONV_NOCONV;
263 }
264 else
265 *handle = result;
266 }
267 else
268 {
269 *nsteps = 0;
270 *handle = NULL;
271 }
272
273 return status;
274 }
275
276
277 /* The main function: find a possible derivation from the `fromset' (either
278 the given name or the alias) to the `toset' (again with alias). */
279 static int
280 internal_function
281 find_derivation (const char *toset, const char *toset_expand,
282 const char *fromset, const char *fromset_expand,
283 struct gconv_step **handle, size_t *nsteps)
284 {
285 __libc_lock_define_initialized (static, lock)
286 struct derivation_step *first, *current, **lastp, *solution = NULL;
287 int best_cost_hi = INT_MAX;
288 int best_cost_lo = INT_MAX;
289 int result;
290
291 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
292 handle, nsteps);
293 if (result == GCONV_OK)
294 return result;
295
296 __libc_lock_lock (lock);
297
298 /* There is a small chance that this derivation is meanwhile found. This
299 can happen if in `find_derivation' we look for this derivation, didn't
300 find it but at the same time another thread looked for this derivation. */
301 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
302 handle, nsteps);
303 if (result == GCONV_OK)
304 {
305 __libc_lock_unlock (lock);
306 return result;
307 }
308
309 /* For now we use a simple algorithm with quadratic runtime behaviour.
310 The task is to match the `toset' with any of the available rules,
311 starting from FROMSET. */
312 if (fromset_expand != NULL)
313 {
314 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
315 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
316 lastp = &first->next->next;
317 }
318 else
319 {
320 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
321 lastp = &first->next;
322 }
323
324 for (current = first; current != NULL; current = current->next)
325 {
326 /* Now match all the available module specifications against the
327 current charset name. If any of them matches check whether
328 we already have a derivation for this charset. If yes, use the
329 one with the lower costs. Otherwise add the new charset at the
330 end.
331
332 The module database is organized in a tree form which allows to
333 search for prefixes. So we search for the first entry with a
334 matching prefix and any other matching entry can be found from
335 this place. */
336 struct gconv_module *node = __gconv_modules_db;
337
338 /* Maybe it is not necessary anymore to look for a solution for
339 this entry since the cost is already as high (or heigher) as
340 the cost for the best solution so far. */
341 if (current->cost_hi > best_cost_hi
342 || (current->cost_hi == best_cost_hi
343 && current->cost_lo >= best_cost_lo))
344 continue;
345
346 while (node != NULL)
347 {
348 int cmpres = strncmp (current->result_set, node->from_constpfx,
349 MIN (current->result_set_len,
350 node->from_constpfx_len));
351
352 if (cmpres == 0)
353 {
354 /* Walk through the list of modules with this prefix and
355 try to match the name. */
356 struct gconv_module *runp;
357
358 if (current->result_set_len < node->from_constpfx_len)
359 /* Cannot possibly match. */
360 break;
361
362 /* Check all the modules with this prefix. */
363 runp = node;
364 do
365 {
366 const char *result_set = NULL;
367
368 if (runp->from_pattern == NULL)
369 {
370 /* This is a simple entry and therefore we have a
371 found an matching entry if the strings are really
372 equal. */
373 if (current->result_set_len == runp->from_constpfx_len)
374 {
375 if (strcmp (runp->to_string, "-") == 0)
376 result_set = toset_expand ?: toset;
377 else
378 result_set = runp->to_string;
379 }
380 }
381 else
382 {
383 /* Compile the regular expression if necessary. */
384 if (runp->from_regex == NULL)
385 {
386 if (__regcomp (&runp->from_regex_mem,
387 runp->from_pattern,
388 REG_EXTENDED | REG_ICASE) != 0)
389 /* Something is wrong. Remember this. */
390 runp->from_regex = (regex_t *) -1L;
391 else
392 runp->from_regex = &runp->from_regex_mem;
393 }
394
395 if (runp->from_regex != (regex_t *) -1L)
396 {
397 regmatch_t match[4];
398
399 /* Try to match the regular expression. */
400 if (__regexec (runp->from_regex, current->result_set,
401 4, match, 0) == 0
402 && match[0].rm_so == 0
403 && current->result_set[match[0].rm_eo] == '\0')
404 {
405 /* At least the whole <from> string is matched.
406 We must now match sed-like possible
407 subexpressions from the match to the
408 toset expression. */
409 #define ENSURE_LEN(LEN) \
410 if (wp + (LEN) >= constr + len - 1) \
411 { \
412 char *newp = alloca (len += 128); \
413 wp = __mempcpy (newp, constr, wp - constr); \
414 constr = newp; \
415 }
416 size_t len = 128;
417 char *constr = alloca (len);
418 char *wp = constr;
419 const char *cp = runp->to_string;
420
421 while (*cp != '\0')
422 {
423 if (*cp != '\\')
424 {
425 ENSURE_LEN (1);
426 *wp++ = *cp++;
427 }
428 else if (cp[1] == '\0')
429 /* Backslash at end of string. */
430 break;
431 else
432 {
433 ++cp;
434 if (*cp == '\\')
435 {
436 *wp++ = *cp++;
437 ENSURE_LEN (1);
438 }
439 else if (*cp < '1' || *cp > '3')
440 break;
441 else
442 {
443 int idx = *cp - '0';
444 if (match[idx].rm_so == -1)
445 /* No match. */
446 break;
447
448 ENSURE_LEN (match[idx].rm_eo
449 - match[idx].rm_so);
450 wp = __mempcpy (wp,
451 &current->result_set[match[idx].rm_so],
452 match[idx].rm_eo
453 - match[idx].rm_so);
454 ++cp;
455 }
456 }
457 }
458 if (*cp == '\0' && wp != constr)
459 {
460 /* Terminate the constructed string. */
461 *wp = '\0';
462 result_set = constr;
463 }
464 }
465 }
466 }
467
468 if (result_set != NULL)
469 {
470 int cost_hi = runp->cost_hi + current->cost_hi;
471 int cost_lo = runp->cost_lo + current->cost_lo;
472 struct derivation_step *step;
473
474 /* We managed to find a derivation. First see whether
475 this is what we are looking for. */
476 if (strcmp (result_set, toset) == 0
477 || (toset_expand != NULL
478 && strcmp (result_set, toset_expand) == 0))
479 {
480 if (solution == NULL || cost_hi < best_cost_hi
481 || (cost_hi == best_cost_hi
482 && cost_lo < best_cost_lo))
483 {
484 best_cost_hi = cost_hi;
485 best_cost_lo = cost_lo;
486 }
487
488 /* Append this solution to list. */
489 if (solution == NULL)
490 solution = NEW_STEP (result_set, 0, 0, runp,
491 current);
492 else
493 {
494 while (solution->next != NULL)
495 solution = solution->next;
496
497 solution->next = NEW_STEP (result_set, 0, 0,
498 runp, current);
499 }
500 }
501 else if (cost_hi < best_cost_hi
502 || (cost_hi == best_cost_hi
503 && cost_lo < best_cost_lo))
504 {
505 /* Append at the end if there is no entry with
506 this name. */
507 for (step = first; step != NULL; step = step->next)
508 if (strcmp (result_set, step->result_set) == 0)
509 break;
510
511 if (step == NULL)
512 {
513 *lastp = NEW_STEP (result_set,
514 cost_hi, cost_lo,
515 runp, current);
516 lastp = &(*lastp)->next;
517 }
518 else if (step->cost_hi > cost_hi
519 || (step->cost_hi == cost_hi
520 && step->cost_lo > cost_lo))
521 {
522 step->code = runp;
523 step->last = current;
524
525 /* Update the cost for all steps. */
526 for (step = first; step != NULL;
527 step = step->next)
528 {
529 struct derivation_step *back;
530
531 if (step->code == NULL)
532 /* This is one of the entries we started
533 from. */
534 continue;
535
536 step->cost_hi = step->code->cost_hi;
537 step->cost_lo = step->code->cost_lo;
538
539 for (back = step->last; back->code != NULL;
540 back = back->last)
541 {
542 step->cost_hi += back->code->cost_hi;
543 step->cost_lo += back->code->cost_lo;
544 }
545 }
546
547 for (step = solution; step != NULL;
548 step = step->next)
549 {
550 step->cost_hi = (step->code->cost_hi
551 + step->last->cost_hi);
552 step->cost_lo = (step->code->cost_lo
553 + step->last->cost_lo);
554
555 if (step->cost_hi < best_cost_hi
556 || (step->cost_hi == best_cost_hi
557 && step->cost_lo < best_cost_lo))
558 {
559 solution = step;
560 best_cost_hi = step->cost_hi;
561 best_cost_lo = step->cost_lo;
562 }
563 }
564 }
565 }
566 }
567
568 runp = runp->same;
569 }
570 while (runp != NULL);
571
572 if (current->result_set_len == node->from_constpfx_len)
573 break;
574
575 node = node->matching;
576 }
577 else if (cmpres < 0)
578 node = node->left;
579 else
580 node = node->right;
581 }
582 }
583
584 if (solution != NULL)
585 /* We really found a way to do the transformation. Now build a data
586 structure describing the transformation steps.*/
587 result = gen_steps (solution, toset_expand ?: toset,
588 fromset_expand ?: fromset, handle, nsteps);
589 else
590 {
591 /* We haven't found a transformation. Clear the result values. */
592 *handle = NULL;
593 *nsteps = 0;
594 }
595
596 /* Add result in any case to list of known derivations. */
597 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
598 *handle, *nsteps);
599
600 __libc_lock_unlock (lock);
601
602 return result;
603 }
604
605
606 int
607 internal_function
608 __gconv_find_transform (const char *toset, const char *fromset,
609 struct gconv_step **handle, size_t *nsteps)
610 {
611 __libc_once_define (static, once);
612 const char *fromset_expand = NULL;
613 const char *toset_expand = NULL;
614 int result;
615
616 /* Ensure that the configuration data is read. */
617 __libc_once (once, __gconv_read_conf);
618
619 /* Acquire the lock. */
620 __libc_lock_lock (lock);
621
622 /* If we don't have a module database return with an error. */
623 if (__gconv_modules_db == NULL)
624 {
625 __libc_lock_unlock (lock);
626 return GCONV_NOCONV;
627 }
628
629 /* See whether the names are aliases. */
630 if (__gconv_alias_db != NULL)
631 {
632 struct gconv_alias key;
633 struct gconv_alias **found;
634
635 key.fromname = fromset;
636 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
637 fromset_expand = found != NULL ? (*found)->toname : NULL;
638
639 key.fromname = toset;
640 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
641 toset_expand = found != NULL ? (*found)->toname : NULL;
642 }
643
644 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
645 handle, nsteps);
646
647 #ifndef STATIC_GCONV
648 /* Increment the user counter. */
649 if (result == GCONV_OK)
650 {
651 size_t cnt = *nsteps;
652 struct gconv_step *steps = *handle;
653
654 do
655 if (steps[--cnt].counter++ == 0)
656 {
657 steps[cnt].shlib_handle =
658 __gconv_find_shlib (steps[cnt].modname);
659 if (steps[cnt].shlib_handle == NULL)
660 {
661 /* Oops, this is the second time we use this module (after
662 unloading) and this time loading failed!? */
663 while (++cnt < *nsteps)
664 __gconv_release_shlib (steps[cnt].shlib_handle);
665 result = GCONV_NOCONV;
666 break;
667 }
668 }
669 while (cnt > 0);
670 }
671 #endif
672
673 /* Release the lock. */
674 __libc_lock_unlock (lock);
675
676 /* The following code is necessary since `find_derivation' will return
677 GCONV_OK even when no derivation was found but the same request
678 was processed before. I.e., negative results will also be cached. */
679 return (result == GCONV_OK
680 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
681 : result);
682 }
683
684
685 /* Release the entries of the modules list. */
686 int
687 internal_function
688 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
689 {
690 int result = GCONV_OK;
691
692 #ifndef STATIC_GCONV
693 /* Acquire the lock. */
694 __libc_lock_lock (lock);
695
696 while (nsteps-- > 0)
697 if (steps[nsteps].shlib_handle != NULL
698 && --steps[nsteps].counter == 0)
699 {
700 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
701 if (result != GCONV_OK)
702 break;
703 steps[nsteps].shlib_handle = NULL;
704 }
705
706 /* Release the lock. */
707 __libc_lock_unlock (lock);
708 #endif
709
710 return result;
711 }
712
713
714 /* Free the modules mentioned. */
715 static void
716 internal_function
717 free_modules_db (struct gconv_module *node)
718 {
719 if (node->left != NULL)
720 free_modules_db (node->left);
721 if (node->right != NULL)
722 free_modules_db (node->right);
723 if (node->same != NULL)
724 free_modules_db (node->same);
725 do
726 {
727 struct gconv_module *act = node;
728 node = node->matching;
729 free (act);
730 }
731 while (node != NULL);
732 }
733
734
735 /* Free all resources if necessary. */
736 static void __attribute__ ((unused))
737 free_mem (void)
738 {
739 if (__gconv_alias_db != NULL)
740 __tdestroy (__gconv_alias_db, free);
741
742 free_modules_db (__gconv_modules_db);
743
744 if (known_derivations != NULL)
745 __tdestroy (known_derivations, free_derivation);
746 }
747
748 text_set_element (__libc_subfreeres, free_mem);