]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
Update.
[thirdparty/glibc.git] / iconv / gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <bits/libc-lock.h>
25
26 #include <gconv_int.h>
27
28
29 /* Simple data structure for alias mapping. We have two names, `from'
30 and `to'. */
31 void *__gconv_alias_db;
32
33 /* Array with available modules. */
34 size_t __gconv_nmodules;
35 struct gconv_module **__gconv_modules_db;
36
37 /* We modify global data. */
38 __libc_lock_define_initialized (static, lock)
39
40
41 /* Function for searching alias. */
42 int
43 __gconv_alias_compare (const void *p1, const void *p2)
44 {
45 struct gconv_alias *s1 = (struct gconv_alias *) p1;
46 struct gconv_alias *s2 = (struct gconv_alias *) p2;
47 return __strcasecmp (s1->fromname, s2->fromname);
48 }
49
50
51 /* To search for a derivation we create a list of intermediate steps.
52 Each element contains a pointer to the element which precedes it
53 in the derivation order. */
54 struct derivation_step
55 {
56 const char *result_set;
57 struct gconv_module *code;
58 struct derivation_step *last;
59 struct derivation_step *next;
60 };
61
62 #define NEW_STEP(result, module, last_mod) \
63 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
64 newp->result_set = result; \
65 newp->code = module; \
66 newp->last = last_mod; \
67 newp->next = NULL; \
68 newp; })
69
70
71 /* If a specific transformation is used more than once we should not need
72 to start looking for it again. Instead cache each successful result. */
73 struct known_derivation
74 {
75 const char *from;
76 const char *to;
77 struct gconv_step *steps;
78 size_t nsteps;
79 };
80
81 /* Compare function for database of found derivations. */
82 static int
83 derivation_compare (const void *p1, const void *p2)
84 {
85 struct known_derivation *s1 = (struct known_derivation *) p1;
86 struct known_derivation *s2 = (struct known_derivation *) p2;
87 int result;
88
89 result = strcmp (s1->from, s2->from);
90 if (result == 0)
91 result = strcmp (s1->to, s2->to);
92 return result;
93 }
94
95 /* The search tree for known derivations. */
96 static void *known_derivations;
97
98 /* Look up whether given transformation was already requested before. */
99 static int
100 internal_function
101 derivation_lookup (const char *fromset, const char *toset,
102 struct gconv_step **handle, size_t *nsteps)
103 {
104 struct known_derivation key = { fromset, toset, NULL, 0 };
105 struct known_derivation *result;
106
107 result = __tfind (&key, &known_derivations, derivation_compare);
108
109 if (result == NULL)
110 return GCONV_NOCONV;
111
112 *handle = result->steps;
113 *nsteps = result->nsteps;
114
115 /* Please note that we return GCONV_OK even if the last search for
116 this transformation was unsuccessful. */
117 return GCONV_OK;
118 }
119
120 /* Add new derivation to list of known ones. */
121 static void
122 internal_function
123 add_derivation (const char *fromset, const char *toset,
124 struct gconv_step *handle, size_t nsteps)
125 {
126 struct known_derivation *new_deriv;
127 size_t fromset_len = strlen (fromset) + 1;
128 size_t toset_len = strlen (toset) + 1;
129
130 new_deriv = (struct known_derivation *)
131 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
132 if (new_deriv != NULL)
133 {
134 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
135 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
136 toset, toset_len);
137
138 new_deriv->steps = handle;
139 new_deriv->nsteps = nsteps;
140
141 __tsearch (new_deriv, &known_derivations, derivation_compare);
142 }
143 /* Please note that we don't complain if the allocation failed. This
144 is not tragically but in case we use the memory debugging facilities
145 not all memory will be freed. */
146 }
147
148 static void
149 internal_function
150 free_derivation (void *p)
151 {
152 struct known_derivation *deriv = (struct known_derivation *) p;
153 size_t cnt;
154
155 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
156 if (deriv->steps[cnt].end_fct)
157 (*deriv->steps[cnt].end_fct) (&deriv->steps[cnt]);
158
159 free ((struct gconv_step *) deriv->steps);
160 free (deriv);
161 }
162
163
164 static int
165 internal_function
166 gen_steps (struct derivation_step *best, const char *toset,
167 const char *fromset, struct gconv_step **handle, size_t *nsteps)
168 {
169 size_t step_cnt = 0;
170 struct gconv_step *result;
171 struct derivation_step *current;
172 int status = GCONV_NOMEM;
173
174 /* First determine number of steps. */
175 for (current = best; current->last != NULL; current = current->last)
176 ++step_cnt;
177
178 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
179 * step_cnt);
180 if (result != NULL)
181 {
182 int failed = 0;
183
184 *nsteps = step_cnt;
185 current = best;
186 while (step_cnt-- > 0)
187 {
188 result[step_cnt].from_name = (step_cnt == 0
189 ? __strdup (fromset)
190 : current->last->result_set);
191 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
192 ? __strdup (current->result_set)
193 : result[step_cnt + 1].from_name);
194
195 if (current->code->module_name[0] == '/')
196 {
197 /* Load the module, return handle for it. */
198 struct gconv_loaded_object *shlib_handle =
199 __gconv_find_shlib (current->code->module_name);
200
201 if (shlib_handle == NULL)
202 {
203 failed = 1;
204 break;
205 }
206
207 result[step_cnt].shlib_handle = shlib_handle;
208 result[step_cnt].modname = shlib_handle->name;
209 result[step_cnt].counter = 0;
210 result[step_cnt].fct = shlib_handle->fct;
211 result[step_cnt].init_fct = shlib_handle->init_fct;
212 result[step_cnt].end_fct = shlib_handle->end_fct;
213 }
214 else
215 /* It's a builtin transformation. */
216 __gconv_get_builtin_trans (current->code->module_name,
217 &result[step_cnt]);
218
219 /* Call the init function. */
220 if (result[step_cnt].init_fct != NULL)
221 (*result[step_cnt].init_fct) (&result[step_cnt]);
222
223 current = current->last;
224 }
225
226 if (failed != 0)
227 {
228 /* Something went wrong while initializing the modules. */
229 while (++step_cnt < *nsteps)
230 {
231 if (result[step_cnt].end_fct != NULL)
232 (*result[step_cnt].end_fct) (&result[step_cnt]);
233 __gconv_release_shlib (result[step_cnt].shlib_handle);
234 }
235 free (result);
236 *nsteps = 0;
237 status = GCONV_NOCONV;
238 }
239 else
240 {
241 *handle = result;
242 status = GCONV_OK;
243 }
244 }
245
246 return status;
247 }
248
249
250 /* The main function: find a possible derivation from the `fromset' (either
251 the given name or the alias) to the `toset' (again with alias). */
252 static int
253 internal_function
254 find_derivation (const char *toset, const char *toset_expand,
255 const char *fromset, const char *fromset_expand,
256 struct gconv_step **handle, size_t *nsteps)
257 {
258 __libc_lock_define_initialized (static, lock)
259 struct derivation_step *first, *current, **lastp, *best = NULL;
260 int best_cost = 0;
261 int result;
262
263 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
264 handle, nsteps);
265 if (result == GCONV_OK)
266 return result;
267
268 __libc_lock_lock (lock);
269
270 /* There is a small chance that this derivation is meanwhile found. This
271 can happen if in `find_derivation' we look for this derivation, didn't
272 find it but at the same time another thread looked for this derivation. */
273 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
274 handle, nsteps);
275 if (result == GCONV_OK)
276 return result;
277
278 /* ### TODO
279 For now we use a simple algorithm with quadratic runtime behaviour.
280 The task is to match the `toset' with any of the available rules,
281 starting from FROMSET. */
282 if (fromset_expand != NULL)
283 {
284 first = NEW_STEP (fromset_expand, NULL, NULL);
285 first->next = NEW_STEP (fromset, NULL, NULL);
286 lastp = &first->next->next;
287 }
288 else
289 {
290 first = NEW_STEP (fromset, NULL, NULL);
291 lastp = &first->next;
292 }
293
294 current = first;
295 while (current != NULL)
296 {
297 /* Now match all the available module specifications against the
298 current charset name. If any of them matches check whether
299 we already have a derivation for this charset. If yes, use the
300 one with the lower costs. Otherwise add the new charset at the
301 end. */
302 size_t cnt;
303
304 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
305 {
306 const char *result_set = NULL;
307
308 if (__gconv_modules_db[cnt]->from_pattern == NULL)
309 {
310 if (__strcasecmp (current->result_set,
311 __gconv_modules_db[cnt]->from_constpfx) == 0)
312 {
313 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
314 result_set = toset_expand ?: toset;
315 else
316 result_set = __gconv_modules_db[cnt]->to_string;
317 }
318 }
319 else
320 /* We have a regular expression. First see if the prefix
321 matches. */
322 if (__strncasecmp (current->result_set,
323 __gconv_modules_db[cnt]->from_constpfx,
324 __gconv_modules_db[cnt]->from_constpfx_len)
325 == 0)
326 {
327 /* First compile the regex if not already done. */
328 if (__gconv_modules_db[cnt]->from_regex == NULL)
329 {
330 regex_t *newp = (regex_t *) malloc (sizeof (regex_t));
331
332 if (regcomp (newp, __gconv_modules_db[cnt]->from_pattern,
333 REG_EXTENDED | REG_ICASE) != 0)
334 {
335 /* Something is wrong. Remember this. */
336 free (newp);
337 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
338 }
339 else
340 __gconv_modules_db[cnt]->from_regex = newp;
341 }
342
343 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
344 {
345 /* Try to match the from name. */
346 regmatch_t match[4];
347
348 if (regexec (__gconv_modules_db[cnt]->from_regex,
349 current->result_set, 4, match, 0) == 0
350 && match[0].rm_so == 0
351 && current->result_set[match[0].rm_eo] == '\0')
352 {
353 /* At least the whole <from> string is matched.
354 We must now match sed-like possible
355 subexpressions from the match to the
356 toset expression. */
357 #define ENSURE_LEN(LEN) \
358 if (wp + (LEN) >= constr + len - 1) \
359 { \
360 char *newp = alloca (len += 128); \
361 memcpy (newp, constr, wp - constr); \
362 wp = newp + (wp - constr); \
363 constr = newp; \
364 }
365 size_t len = 128;
366 char *constr = alloca (len);
367 char *wp = constr;
368 const char *cp = __gconv_modules_db[cnt]->to_string;
369
370 while (*cp != '\0')
371 {
372 if (*cp != '\\')
373 {
374 ENSURE_LEN (1);
375 *wp++ = *cp++;
376 }
377 else if (cp[1] == '\0')
378 /* Backslash at end of string. */
379 break;
380 else
381 {
382 ++cp;
383 if (*cp == '\\')
384 {
385 *wp++ = *cp++;
386 ENSURE_LEN (1);
387 }
388 else if (*cp < '1' || *cp > '3')
389 break;
390 else
391 {
392 int idx = *cp - '0';
393 if (match[idx].rm_so == -1)
394 /* No match. */
395 break;
396
397 ENSURE_LEN (match[idx].rm_eo
398 - match[idx].rm_so);
399 wp = __mempcpy (wp,
400 &current->result_set[match[idx].rm_so],
401 match[idx].rm_eo
402 - match[idx].rm_so);
403 ++cp;
404 }
405 }
406 }
407 if (*cp == '\0' && wp != constr)
408 {
409 /* Terminate the constructed string. */
410 *wp = '\0';
411 result_set = constr;
412 }
413 }
414 }
415 }
416
417 if (result_set != NULL)
418 {
419 /* We managed to find a derivation. First see whether
420 this is what we are looking for. */
421 if (__strcasecmp (result_set, toset) == 0
422 || (toset_expand != NULL
423 && __strcasecmp (result_set, toset_expand) == 0))
424 {
425 /* Determine the costs. If they are lower than the
426 previous solution (or this is the first solution)
427 remember this solution. */
428 int cost = __gconv_modules_db[cnt]->cost;
429 struct derivation_step *runp = current;
430 while (runp->code != NULL)
431 {
432 cost += runp->code->cost;
433 runp = runp->last;
434 }
435 if (best == NULL || cost < best_cost)
436 {
437 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
438 current);
439 best_cost = cost;
440 }
441 }
442 else
443 {
444 /* Append at the end if there is no entry with this name. */
445 struct derivation_step *runp = first;
446
447 while (runp != NULL)
448 {
449 if (__strcasecmp (result_set, runp->result_set) == 0)
450 break;
451 runp = runp->next;
452 }
453
454 if (runp == NULL)
455 {
456 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
457 current);
458 lastp = &(*lastp)->next;
459 }
460 }
461 }
462 }
463
464 /* Go on with the next entry. */
465 current = current->next;
466 }
467
468 if (best != NULL)
469 /* We really found a way to do the transformation. Now build a data
470 structure describing the transformation steps.*/
471 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
472 handle, nsteps);
473 else
474 {
475 /* We haven't found a transformation. Clear the result values. */
476 *handle = NULL;
477 *nsteps = 0;
478 }
479
480 /* Add result in any case to list of known derivations. */
481 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
482 *handle, *nsteps);
483
484 __libc_lock_unlock (lock);
485
486 return result;
487 }
488
489
490 int
491 internal_function
492 __gconv_find_transform (const char *toset, const char *fromset,
493 struct gconv_step **handle, size_t *nsteps)
494 {
495 __libc_once_define (static, once);
496 const char *fromset_expand = NULL;
497 const char *toset_expand = NULL;
498 int result;
499
500 /* Ensure that the configuration data is read. */
501 __libc_once (once, __gconv_read_conf);
502
503 /* Acquire the lock. */
504 __libc_lock_lock (lock);
505
506 /* If we don't have a module database return with an error. */
507 if (__gconv_modules_db == NULL)
508 return GCONV_NOCONV;
509
510 /* See whether the names are aliases. */
511 if (__gconv_alias_db != NULL)
512 {
513 struct gconv_alias key;
514 struct gconv_alias **found;
515
516 key.fromname = fromset;
517 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
518 fromset_expand = found != NULL ? (*found)->toname : NULL;
519
520 key.fromname = toset;
521 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
522 toset_expand = found != NULL ? (*found)->toname : NULL;
523 }
524
525 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
526 handle, nsteps);
527
528 /* Increment the user counter. */
529 if (result == GCONV_OK)
530 {
531 size_t cnt = *nsteps;
532 struct gconv_step *steps = *handle;
533
534 do
535 if (steps[--cnt].counter++ == 0)
536 {
537 steps[--cnt].shlib_handle =
538 __gconv_find_shlib (steps[--cnt].modname);
539 if (steps[--cnt].shlib_handle == NULL)
540 {
541 /* Oops, this is the second time we use this module (after
542 unloading) and this time loading failed!? */
543 while (++cnt < *nsteps)
544 __gconv_release_shlib (steps[cnt].shlib_handle);
545 result = GCONV_NOCONV;
546 break;
547 }
548 }
549 while (cnt > 0);
550 }
551
552 /* Release the lock. */
553 __libc_lock_unlock (lock);
554
555 /* The following code is necessary since `find_derivation' will return
556 GCONV_OK even when no derivation was found but the same request
557 was processed before. I.e., negative results will also be cached. */
558 return (result == GCONV_OK
559 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
560 : result);
561 }
562
563
564 /* Release the entries of the modules list. */
565 int
566 internal_function
567 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
568 {
569 int result = GCONV_OK;
570
571 /* Acquire the lock. */
572 __libc_lock_lock (lock);
573
574 while (nsteps-- > 0)
575 if (steps[nsteps].shlib_handle != NULL
576 && --steps[nsteps].counter == 0)
577 {
578 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
579 if (result != GCONV_OK)
580 break;
581 steps[nsteps].shlib_handle = NULL;
582 }
583
584 /* Release the lock. */
585 __libc_lock_unlock (lock);
586
587 return result;
588 }
589
590
591 /* Free all resources if necessary. */
592 static void __attribute__ ((unused))
593 free_mem (void)
594 {
595 size_t cnt;
596
597 if (__gconv_alias_db != NULL)
598 __tdestroy (__gconv_alias_db, free);
599
600 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
601 {
602 if (__gconv_modules_db[cnt]->from_regex != NULL)
603 regfree ((regex_t *) __gconv_modules_db[cnt]->from_regex);
604
605 /* Modules which names do not start with a slash are builtin
606 transformations and the memory is not allocated dynamically. */
607 if (__gconv_modules_db[cnt]->module_name[0] == '/')
608 free (__gconv_modules_db[cnt]);
609 }
610
611 if (known_derivations != NULL)
612 __tdestroy (known_derivations, free_derivation);
613 }
614
615 text_set_element (__libc_subfreeres, free_mem);