]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
Update.
[thirdparty/glibc.git] / iconv / gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <bits/libc-lock.h>
25
26 #ifndef STATIC_GCONV
27 # include <elf/ldsodefs.h>
28 #endif
29
30 #include <gconv_int.h>
31
32
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
36
37 /* Array with available modules. */
38 size_t __gconv_nmodules;
39 struct gconv_module **__gconv_modules_db;
40
41 /* We modify global data. */
42 __libc_lock_define_initialized (static, lock)
43
44
45 /* Function for searching alias. */
46 int
47 __gconv_alias_compare (const void *p1, const void *p2)
48 {
49 struct gconv_alias *s1 = (struct gconv_alias *) p1;
50 struct gconv_alias *s2 = (struct gconv_alias *) p2;
51 return __strcasecmp (s1->fromname, s2->fromname);
52 }
53
54
55 /* To search for a derivation we create a list of intermediate steps.
56 Each element contains a pointer to the element which precedes it
57 in the derivation order. */
58 struct derivation_step
59 {
60 const char *result_set;
61 struct gconv_module *code;
62 struct derivation_step *last;
63 struct derivation_step *next;
64 };
65
66 #define NEW_STEP(result, module, last_mod) \
67 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
68 newp->result_set = result; \
69 newp->code = module; \
70 newp->last = last_mod; \
71 newp->next = NULL; \
72 newp; })
73
74
75 /* If a specific transformation is used more than once we should not need
76 to start looking for it again. Instead cache each successful result. */
77 struct known_derivation
78 {
79 const char *from;
80 const char *to;
81 struct gconv_step *steps;
82 size_t nsteps;
83 };
84
85 /* Compare function for database of found derivations. */
86 static int
87 derivation_compare (const void *p1, const void *p2)
88 {
89 struct known_derivation *s1 = (struct known_derivation *) p1;
90 struct known_derivation *s2 = (struct known_derivation *) p2;
91 int result;
92
93 result = strcmp (s1->from, s2->from);
94 if (result == 0)
95 result = strcmp (s1->to, s2->to);
96 return result;
97 }
98
99 /* The search tree for known derivations. */
100 static void *known_derivations;
101
102 /* Look up whether given transformation was already requested before. */
103 static int
104 internal_function
105 derivation_lookup (const char *fromset, const char *toset,
106 struct gconv_step **handle, size_t *nsteps)
107 {
108 struct known_derivation key = { fromset, toset, NULL, 0 };
109 struct known_derivation **result;
110
111 result = __tfind (&key, &known_derivations, derivation_compare);
112
113 if (result == NULL)
114 return GCONV_NOCONV;
115
116 *handle = (*result)->steps;
117 *nsteps = (*result)->nsteps;
118
119 /* Please note that we return GCONV_OK even if the last search for
120 this transformation was unsuccessful. */
121 return GCONV_OK;
122 }
123
124 /* Add new derivation to list of known ones. */
125 static void
126 internal_function
127 add_derivation (const char *fromset, const char *toset,
128 struct gconv_step *handle, size_t nsteps)
129 {
130 struct known_derivation *new_deriv;
131 size_t fromset_len = strlen (fromset) + 1;
132 size_t toset_len = strlen (toset) + 1;
133
134 new_deriv = (struct known_derivation *)
135 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
136 if (new_deriv != NULL)
137 {
138 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
139 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
140 toset, toset_len);
141
142 new_deriv->steps = handle;
143 new_deriv->nsteps = nsteps;
144
145 __tsearch (new_deriv, &known_derivations, derivation_compare);
146 }
147 /* Please note that we don't complain if the allocation failed. This
148 is not tragically but in case we use the memory debugging facilities
149 not all memory will be freed. */
150 }
151
152 static void
153 internal_function
154 free_derivation (void *p)
155 {
156 struct known_derivation *deriv = (struct known_derivation *) p;
157 size_t cnt;
158
159 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
160 if (deriv->steps[cnt].end_fct)
161 #ifdef _CALL_DL_FCT
162 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
163 #else
164 deriv->steps[cnt].end_fct (&deriv->steps[cnt]);
165 #endif
166
167 free ((struct gconv_step *) deriv->steps);
168 free (deriv);
169 }
170
171
172 static int
173 internal_function
174 gen_steps (struct derivation_step *best, const char *toset,
175 const char *fromset, struct gconv_step **handle, size_t *nsteps)
176 {
177 size_t step_cnt = 0;
178 struct gconv_step *result;
179 struct derivation_step *current;
180 int status = GCONV_NOMEM;
181
182 /* First determine number of steps. */
183 for (current = best; current->last != NULL; current = current->last)
184 ++step_cnt;
185
186 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
187 * step_cnt);
188 if (result != NULL)
189 {
190 int failed = 0;
191
192 *nsteps = step_cnt;
193 current = best;
194 while (step_cnt-- > 0)
195 {
196 result[step_cnt].from_name = (step_cnt == 0
197 ? __strdup (fromset)
198 : current->last->result_set);
199 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
200 ? __strdup (current->result_set)
201 : result[step_cnt + 1].from_name);
202
203 #ifndef STATIC_GCONV
204 if (current->code->module_name[0] == '/')
205 {
206 /* Load the module, return handle for it. */
207 struct gconv_loaded_object *shlib_handle =
208 __gconv_find_shlib (current->code->module_name);
209
210 if (shlib_handle == NULL)
211 {
212 failed = 1;
213 break;
214 }
215
216 result[step_cnt].shlib_handle = shlib_handle;
217 result[step_cnt].modname = shlib_handle->name;
218 result[step_cnt].counter = 0;
219 result[step_cnt].fct = shlib_handle->fct;
220 result[step_cnt].init_fct = shlib_handle->init_fct;
221 result[step_cnt].end_fct = shlib_handle->end_fct;
222 }
223 else
224 #endif
225 /* It's a builtin transformation. */
226 __gconv_get_builtin_trans (current->code->module_name,
227 &result[step_cnt]);
228
229 /* Call the init function. */
230 if (result[step_cnt].init_fct != NULL)
231 #ifdef _CALL_DL_FCT
232 _CALL_DL_FCT (result[step_cnt].init_fct, (&result[step_cnt]));
233 #else
234 result[step_cnt].init_fct (&result[step_cnt]);
235 #endif
236
237 current = current->last;
238 }
239
240 if (failed != 0)
241 {
242 /* Something went wrong while initializing the modules. */
243 while (++step_cnt < *nsteps)
244 {
245 if (result[step_cnt].end_fct != NULL)
246 #ifdef _CALL_DL_FCT
247 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
248 #else
249 result[step_cnt].end_fct (&result[step_cnt]);
250 #endif
251 #ifndef STATIC_GCONV
252 __gconv_release_shlib (result[step_cnt].shlib_handle);
253 #endif
254 }
255 free (result);
256 *nsteps = 0;
257 *handle = NULL;
258 status = GCONV_NOCONV;
259 }
260 else
261 {
262 *handle = result;
263 status = GCONV_OK;
264 }
265 }
266 else
267 {
268 *nsteps = 0;
269 *handle = NULL;
270 }
271
272 return status;
273 }
274
275
276 /* The main function: find a possible derivation from the `fromset' (either
277 the given name or the alias) to the `toset' (again with alias). */
278 static int
279 internal_function
280 find_derivation (const char *toset, const char *toset_expand,
281 const char *fromset, const char *fromset_expand,
282 struct gconv_step **handle, size_t *nsteps)
283 {
284 __libc_lock_define_initialized (static, lock)
285 struct derivation_step *first, *current, **lastp, *best = NULL;
286 int best_cost_hi = 0;
287 int best_cost_lo = 0;
288 int result;
289
290 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
291 handle, nsteps);
292 if (result == GCONV_OK)
293 return result;
294
295 __libc_lock_lock (lock);
296
297 /* There is a small chance that this derivation is meanwhile found. This
298 can happen if in `find_derivation' we look for this derivation, didn't
299 find it but at the same time another thread looked for this derivation. */
300 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
301 handle, nsteps);
302 if (result == GCONV_OK)
303 return result;
304
305 /* ### TODO
306 For now we use a simple algorithm with quadratic runtime behaviour.
307 The task is to match the `toset' with any of the available rules,
308 starting from FROMSET. */
309 if (fromset_expand != NULL)
310 {
311 first = NEW_STEP (fromset_expand, NULL, NULL);
312 first->next = NEW_STEP (fromset, NULL, NULL);
313 lastp = &first->next->next;
314 }
315 else
316 {
317 first = NEW_STEP (fromset, NULL, NULL);
318 lastp = &first->next;
319 }
320
321 current = first;
322 while (current != NULL)
323 {
324 /* Now match all the available module specifications against the
325 current charset name. If any of them matches check whether
326 we already have a derivation for this charset. If yes, use the
327 one with the lower costs. Otherwise add the new charset at the
328 end. */
329 size_t cnt;
330
331 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
332 {
333 const char *result_set = NULL;
334
335 if (__gconv_modules_db[cnt]->from_pattern == NULL)
336 {
337 if (__strcasecmp (current->result_set,
338 __gconv_modules_db[cnt]->from_constpfx) == 0)
339 {
340 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
341 result_set = toset_expand ?: toset;
342 else
343 result_set = __gconv_modules_db[cnt]->to_string;
344 }
345 }
346 else
347 /* We have a regular expression. First see if the prefix
348 matches. */
349 if (__strncasecmp (current->result_set,
350 __gconv_modules_db[cnt]->from_constpfx,
351 __gconv_modules_db[cnt]->from_constpfx_len)
352 == 0)
353 {
354 /* First compile the regex if not already done. */
355 if (__gconv_modules_db[cnt]->from_regex == NULL)
356 {
357 regex_t *newp = (regex_t *) malloc (sizeof (regex_t));
358
359 if (__regcomp (newp, __gconv_modules_db[cnt]->from_pattern,
360 REG_EXTENDED | REG_ICASE) != 0)
361 {
362 /* Something is wrong. Remember this. */
363 free (newp);
364 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
365 }
366 else
367 __gconv_modules_db[cnt]->from_regex = newp;
368 }
369
370 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
371 {
372 /* Try to match the from name. */
373 regmatch_t match[4];
374
375 if (__regexec (__gconv_modules_db[cnt]->from_regex,
376 current->result_set, 4, match, 0) == 0
377 && match[0].rm_so == 0
378 && current->result_set[match[0].rm_eo] == '\0')
379 {
380 /* At least the whole <from> string is matched.
381 We must now match sed-like possible
382 subexpressions from the match to the
383 toset expression. */
384 #define ENSURE_LEN(LEN) \
385 if (wp + (LEN) >= constr + len - 1) \
386 { \
387 char *newp = alloca (len += 128); \
388 memcpy (newp, constr, wp - constr); \
389 wp = newp + (wp - constr); \
390 constr = newp; \
391 }
392 size_t len = 128;
393 char *constr = alloca (len);
394 char *wp = constr;
395 const char *cp = __gconv_modules_db[cnt]->to_string;
396
397 while (*cp != '\0')
398 {
399 if (*cp != '\\')
400 {
401 ENSURE_LEN (1);
402 *wp++ = *cp++;
403 }
404 else if (cp[1] == '\0')
405 /* Backslash at end of string. */
406 break;
407 else
408 {
409 ++cp;
410 if (*cp == '\\')
411 {
412 *wp++ = *cp++;
413 ENSURE_LEN (1);
414 }
415 else if (*cp < '1' || *cp > '3')
416 break;
417 else
418 {
419 int idx = *cp - '0';
420 if (match[idx].rm_so == -1)
421 /* No match. */
422 break;
423
424 ENSURE_LEN (match[idx].rm_eo
425 - match[idx].rm_so);
426 wp = __mempcpy (wp,
427 &current->result_set[match[idx].rm_so],
428 match[idx].rm_eo
429 - match[idx].rm_so);
430 ++cp;
431 }
432 }
433 }
434 if (*cp == '\0' && wp != constr)
435 {
436 /* Terminate the constructed string. */
437 *wp = '\0';
438 result_set = constr;
439 }
440 }
441 }
442 }
443
444 if (result_set != NULL)
445 {
446 /* We managed to find a derivation. First see whether
447 this is what we are looking for. */
448 if (__strcasecmp (result_set, toset) == 0
449 || (toset_expand != NULL
450 && __strcasecmp (result_set, toset_expand) == 0))
451 {
452 /* Determine the costs. If they are lower than the
453 previous solution (or this is the first solution)
454 remember this solution. */
455 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
456 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
457 struct derivation_step *runp = current;
458 while (runp->code != NULL)
459 {
460 cost_hi += runp->code->cost_hi;
461 cost_lo += runp->code->cost_lo;
462 runp = runp->last;
463 }
464 if (best == NULL || cost_hi < best_cost_hi
465 || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
466 {
467 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
468 current);
469 best_cost_hi = cost_hi;
470 best_cost_lo = cost_lo;
471 }
472 }
473 else
474 {
475 /* Append at the end if there is no entry with this name. */
476 struct derivation_step *runp = first;
477
478 while (runp != NULL)
479 {
480 if (__strcasecmp (result_set, runp->result_set) == 0)
481 break;
482 runp = runp->next;
483 }
484
485 if (runp == NULL)
486 {
487 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
488 current);
489 lastp = &(*lastp)->next;
490 }
491 }
492 }
493 }
494
495 /* Go on with the next entry. */
496 current = current->next;
497 }
498
499 if (best != NULL)
500 /* We really found a way to do the transformation. Now build a data
501 structure describing the transformation steps.*/
502 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
503 handle, nsteps);
504 else
505 {
506 /* We haven't found a transformation. Clear the result values. */
507 *handle = NULL;
508 *nsteps = 0;
509 }
510
511 /* Add result in any case to list of known derivations. */
512 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
513 *handle, *nsteps);
514
515 __libc_lock_unlock (lock);
516
517 return result;
518 }
519
520
521 int
522 internal_function
523 __gconv_find_transform (const char *toset, const char *fromset,
524 struct gconv_step **handle, size_t *nsteps)
525 {
526 __libc_once_define (static, once);
527 const char *fromset_expand = NULL;
528 const char *toset_expand = NULL;
529 int result;
530
531 /* Ensure that the configuration data is read. */
532 __libc_once (once, __gconv_read_conf);
533
534 /* Acquire the lock. */
535 __libc_lock_lock (lock);
536
537 /* If we don't have a module database return with an error. */
538 if (__gconv_modules_db == NULL)
539 return GCONV_NOCONV;
540
541 /* See whether the names are aliases. */
542 if (__gconv_alias_db != NULL)
543 {
544 struct gconv_alias key;
545 struct gconv_alias **found;
546
547 key.fromname = fromset;
548 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
549 fromset_expand = found != NULL ? (*found)->toname : NULL;
550
551 key.fromname = toset;
552 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
553 toset_expand = found != NULL ? (*found)->toname : NULL;
554 }
555
556 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
557 handle, nsteps);
558
559 #ifndef STATIC_GCONV
560 /* Increment the user counter. */
561 if (result == GCONV_OK)
562 {
563 size_t cnt = *nsteps;
564 struct gconv_step *steps = *handle;
565
566 do
567 if (steps[--cnt].counter++ == 0)
568 {
569 steps[cnt].shlib_handle =
570 __gconv_find_shlib (steps[cnt].modname);
571 if (steps[cnt].shlib_handle == NULL)
572 {
573 /* Oops, this is the second time we use this module (after
574 unloading) and this time loading failed!? */
575 while (++cnt < *nsteps)
576 __gconv_release_shlib (steps[cnt].shlib_handle);
577 result = GCONV_NOCONV;
578 break;
579 }
580 }
581 while (cnt > 0);
582 }
583 #endif
584
585 /* Release the lock. */
586 __libc_lock_unlock (lock);
587
588 /* The following code is necessary since `find_derivation' will return
589 GCONV_OK even when no derivation was found but the same request
590 was processed before. I.e., negative results will also be cached. */
591 return (result == GCONV_OK
592 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
593 : result);
594 }
595
596
597 /* Release the entries of the modules list. */
598 int
599 internal_function
600 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
601 {
602 int result = GCONV_OK;
603
604 #ifndef STATIC_GCONV
605 /* Acquire the lock. */
606 __libc_lock_lock (lock);
607
608 while (nsteps-- > 0)
609 if (steps[nsteps].shlib_handle != NULL
610 && --steps[nsteps].counter == 0)
611 {
612 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
613 if (result != GCONV_OK)
614 break;
615 steps[nsteps].shlib_handle = NULL;
616 }
617
618 /* Release the lock. */
619 __libc_lock_unlock (lock);
620 #endif
621
622 return result;
623 }
624
625
626 /* Free all resources if necessary. */
627 static void __attribute__ ((unused))
628 free_mem (void)
629 {
630 size_t cnt;
631
632 if (__gconv_alias_db != NULL)
633 __tdestroy (__gconv_alias_db, free);
634
635 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
636 {
637 if (__gconv_modules_db[cnt]->from_regex != NULL)
638 __regfree ((regex_t *) __gconv_modules_db[cnt]->from_regex);
639
640 /* Modules which names do not start with a slash are builtin
641 transformations and the memory is not allocated dynamically. */
642 if (__gconv_modules_db[cnt]->module_name[0] == '/')
643 free (__gconv_modules_db[cnt]);
644 }
645
646 if (known_derivations != NULL)
647 __tdestroy (known_derivations, free_derivation);
648 }
649
650 text_set_element (__libc_subfreeres, free_mem);