]>
git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_db.c
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
24 #include <bits/libc-lock.h>
26 #include <gconv_int.h>
29 /* Simple data structure for alias mapping. We have two names, `from'
31 void *__gconv_alias_db
;
33 /* Array with available modules. */
34 size_t __gconv_nmodules
;
35 struct gconv_module
**__gconv_modules_db
;
37 /* We modify global data. */
38 __libc_lock_define_initialized (static, lock
)
41 /* Function for searching alias. */
43 __gconv_alias_compare (const void *p1
, const void *p2
)
45 struct gconv_alias
*s1
= (struct gconv_alias
*) p1
;
46 struct gconv_alias
*s2
= (struct gconv_alias
*) p2
;
47 return __strcasecmp (s1
->fromname
, s2
->fromname
);
51 /* To search for a derivation we create a list of intermediate steps.
52 Each element contains a pointer to the element which precedes it
53 in the derivation order. */
54 struct derivation_step
56 const char *result_set
;
57 struct gconv_module
*code
;
58 struct derivation_step
*last
;
59 struct derivation_step
*next
;
62 #define NEW_STEP(result, module, last_mod) \
63 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
64 newp->result_set = result; \
65 newp->code = module; \
66 newp->last = last_mod; \
71 /* If a specific transformation is used more than once we should not need
72 to start looking for it again. Instead cache each successful result. */
73 struct known_derivation
77 struct gconv_step
*steps
;
81 /* Compare function for database of found derivations. */
83 derivation_compare (const void *p1
, const void *p2
)
85 struct known_derivation
*s1
= (struct known_derivation
*) p1
;
86 struct known_derivation
*s2
= (struct known_derivation
*) p2
;
89 result
= strcmp (s1
->from
, s2
->from
);
91 result
= strcmp (s1
->to
, s2
->to
);
95 /* The search tree for known derivations. */
96 static void *known_derivations
;
98 /* Look up whether given transformation was already requested before. */
101 derivation_lookup (const char *fromset
, const char *toset
,
102 struct gconv_step
**handle
, size_t *nsteps
)
104 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
105 struct known_derivation
*result
;
107 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
112 *handle
= result
->steps
;
113 *nsteps
= result
->nsteps
;
115 /* Please note that we return GCONV_OK even if the last search for
116 this transformation was unsuccessful. */
120 /* Add new derivation to list of known ones. */
123 add_derivation (const char *fromset
, const char *toset
,
124 struct gconv_step
*handle
, size_t nsteps
)
126 struct known_derivation
*new_deriv
;
127 size_t fromset_len
= strlen (fromset
) + 1;
128 size_t toset_len
= strlen (toset
) + 1;
130 new_deriv
= (struct known_derivation
*)
131 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
132 if (new_deriv
!= NULL
)
134 new_deriv
->from
= memcpy (new_deriv
+ 1, fromset
, fromset_len
);
135 new_deriv
->to
= memcpy ((char *) new_deriv
->from
+ fromset_len
,
138 new_deriv
->steps
= handle
;
139 new_deriv
->nsteps
= nsteps
;
141 __tsearch (new_deriv
, &known_derivations
, derivation_compare
);
143 /* Please note that we don't complain if the allocation failed. This
144 is not tragically but in case we use the memory debugging facilities
145 not all memory will be freed. */
150 free_derivation (void *p
)
152 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
155 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
156 if (deriv
->steps
[cnt
].end_fct
)
157 (*deriv
->steps
[cnt
].end_fct
) (&deriv
->steps
[cnt
]);
159 free ((struct gconv_step
*) deriv
->steps
);
166 gen_steps (struct derivation_step
*best
, const char *toset
,
167 const char *fromset
, struct gconv_step
**handle
, size_t *nsteps
)
170 struct gconv_step
*result
;
171 struct derivation_step
*current
;
172 int status
= GCONV_NOMEM
;
174 /* First determine number of steps. */
175 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
178 result
= (struct gconv_step
*) malloc (sizeof (struct gconv_step
)
186 while (step_cnt
-- > 0)
188 result
[step_cnt
].from_name
= (step_cnt
== 0
190 : current
->last
->result_set
);
191 result
[step_cnt
].to_name
= (step_cnt
+ 1 == *nsteps
192 ? __strdup (current
->result_set
)
193 : result
[step_cnt
+ 1].from_name
);
195 if (current
->code
->module_name
[0] == '/')
197 /* Load the module, return handle for it. */
198 struct gconv_loaded_object
*shlib_handle
=
199 __gconv_find_shlib (current
->code
->module_name
);
201 if (shlib_handle
== NULL
)
207 result
[step_cnt
].shlib_handle
= shlib_handle
;
208 result
[step_cnt
].modname
= shlib_handle
->name
;
209 result
[step_cnt
].counter
= 0;
210 result
[step_cnt
].fct
= shlib_handle
->fct
;
211 result
[step_cnt
].init_fct
= shlib_handle
->init_fct
;
212 result
[step_cnt
].end_fct
= shlib_handle
->end_fct
;
215 /* It's a builtin transformation. */
216 __gconv_get_builtin_trans (current
->code
->module_name
,
219 /* Call the init function. */
220 if (result
[step_cnt
].init_fct
!= NULL
)
221 (*result
[step_cnt
].init_fct
) (&result
[step_cnt
]);
223 current
= current
->last
;
228 /* Something went wrong while initializing the modules. */
229 while (++step_cnt
< *nsteps
)
231 if (result
[step_cnt
].end_fct
!= NULL
)
232 (*result
[step_cnt
].end_fct
) (&result
[step_cnt
]);
233 __gconv_release_shlib (result
[step_cnt
].shlib_handle
);
237 status
= GCONV_NOCONV
;
250 /* The main function: find a possible derivation from the `fromset' (either
251 the given name or the alias) to the `toset' (again with alias). */
254 find_derivation (const char *toset
, const char *toset_expand
,
255 const char *fromset
, const char *fromset_expand
,
256 struct gconv_step
**handle
, size_t *nsteps
)
258 __libc_lock_define_initialized (static, lock
)
259 struct derivation_step
*first
, *current
, **lastp
, *best
= NULL
;
263 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
265 if (result
== GCONV_OK
)
268 __libc_lock_lock (lock
);
270 /* There is a small chance that this derivation is meanwhile found. This
271 can happen if in `find_derivation' we look for this derivation, didn't
272 find it but at the same time another thread looked for this derivation. */
273 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
275 if (result
== GCONV_OK
)
279 For now we use a simple algorithm with quadratic runtime behaviour.
280 The task is to match the `toset' with any of the available rules,
281 starting from FROMSET. */
282 if (fromset_expand
!= NULL
)
284 first
= NEW_STEP (fromset_expand
, NULL
, NULL
);
285 first
->next
= NEW_STEP (fromset
, NULL
, NULL
);
286 lastp
= &first
->next
->next
;
290 first
= NEW_STEP (fromset
, NULL
, NULL
);
291 lastp
= &first
->next
;
295 while (current
!= NULL
)
297 /* Now match all the available module specifications against the
298 current charset name. If any of them matches check whether
299 we already have a derivation for this charset. If yes, use the
300 one with the lower costs. Otherwise add the new charset at the
304 for (cnt
= 0; cnt
< __gconv_nmodules
; ++cnt
)
306 const char *result_set
= NULL
;
308 if (__gconv_modules_db
[cnt
]->from_pattern
== NULL
)
310 if (__strcasecmp (current
->result_set
,
311 __gconv_modules_db
[cnt
]->from_constpfx
) == 0)
313 if (strcmp (__gconv_modules_db
[cnt
]->to_string
, "-") == 0)
314 result_set
= toset_expand
?: toset
;
316 result_set
= __gconv_modules_db
[cnt
]->to_string
;
320 /* We have a regular expression. First see if the prefix
322 if (__strncasecmp (current
->result_set
,
323 __gconv_modules_db
[cnt
]->from_constpfx
,
324 __gconv_modules_db
[cnt
]->from_constpfx_len
)
327 /* First compile the regex if not already done. */
328 if (__gconv_modules_db
[cnt
]->from_regex
== NULL
)
330 regex_t
*newp
= (regex_t
*) malloc (sizeof (regex_t
));
332 if (regcomp (newp
, __gconv_modules_db
[cnt
]->from_pattern
,
333 REG_EXTENDED
| REG_ICASE
) != 0)
335 /* Something is wrong. Remember this. */
337 __gconv_modules_db
[cnt
]->from_regex
= (regex_t
*) -1L;
340 __gconv_modules_db
[cnt
]->from_regex
= newp
;
343 if (__gconv_modules_db
[cnt
]->from_regex
!= (regex_t
*) -1L)
345 /* Try to match the from name. */
348 if (regexec (__gconv_modules_db
[cnt
]->from_regex
,
349 current
->result_set
, 4, match
, 0) == 0
350 && match
[0].rm_so
== 0
351 && current
->result_set
[match
[0].rm_eo
] == '\0')
353 /* At least the whole <from> string is matched.
354 We must now match sed-like possible
355 subexpressions from the match to the
357 #define ENSURE_LEN(LEN) \
358 if (wp + (LEN) >= constr + len - 1) \
360 char *newp = alloca (len += 128); \
361 memcpy (newp, constr, wp - constr); \
362 wp = newp + (wp - constr); \
366 char *constr
= alloca (len
);
368 const char *cp
= __gconv_modules_db
[cnt
]->to_string
;
377 else if (cp
[1] == '\0')
378 /* Backslash at end of string. */
388 else if (*cp
< '1' || *cp
> '3')
393 if (match
[idx
].rm_so
== -1)
397 ENSURE_LEN (match
[idx
].rm_eo
400 ¤t
->result_set
[match
[idx
].rm_so
],
407 if (*cp
== '\0' && wp
!= constr
)
409 /* Terminate the constructed string. */
417 if (result_set
!= NULL
)
419 /* We managed to find a derivation. First see whether
420 this is what we are looking for. */
421 if (__strcasecmp (result_set
, toset
) == 0
422 || (toset_expand
!= NULL
423 && __strcasecmp (result_set
, toset_expand
) == 0))
425 /* Determine the costs. If they are lower than the
426 previous solution (or this is the first solution)
427 remember this solution. */
428 int cost
= __gconv_modules_db
[cnt
]->cost
;
429 struct derivation_step
*runp
= current
;
430 while (runp
->code
!= NULL
)
432 cost
+= runp
->code
->cost
;
435 if (best
== NULL
|| cost
< best_cost
)
437 best
= NEW_STEP (result_set
, __gconv_modules_db
[cnt
],
444 /* Append at the end if there is no entry with this name. */
445 struct derivation_step
*runp
= first
;
449 if (__strcasecmp (result_set
, runp
->result_set
) == 0)
456 *lastp
= NEW_STEP (result_set
, __gconv_modules_db
[cnt
],
458 lastp
= &(*lastp
)->next
;
464 /* Go on with the next entry. */
465 current
= current
->next
;
469 /* We really found a way to do the transformation. Now build a data
470 structure describing the transformation steps.*/
471 result
= gen_steps (best
, toset_expand
?: toset
, fromset_expand
?: fromset
,
475 /* We haven't found a transformation. Clear the result values. */
480 /* Add result in any case to list of known derivations. */
481 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
484 __libc_lock_unlock (lock
);
492 __gconv_find_transform (const char *toset
, const char *fromset
,
493 struct gconv_step
**handle
, size_t *nsteps
)
495 __libc_once_define (static, once
);
496 const char *fromset_expand
= NULL
;
497 const char *toset_expand
= NULL
;
500 /* Ensure that the configuration data is read. */
501 __libc_once (once
, __gconv_read_conf
);
503 /* Acquire the lock. */
504 __libc_lock_lock (lock
);
506 /* If we don't have a module database return with an error. */
507 if (__gconv_modules_db
== NULL
)
510 /* See whether the names are aliases. */
511 if (__gconv_alias_db
!= NULL
)
513 struct gconv_alias key
;
514 struct gconv_alias
**found
;
516 key
.fromname
= fromset
;
517 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
518 fromset_expand
= found
!= NULL
? (*found
)->toname
: NULL
;
520 key
.fromname
= toset
;
521 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
522 toset_expand
= found
!= NULL
? (*found
)->toname
: NULL
;
525 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
528 /* Increment the user counter. */
529 if (result
== GCONV_OK
)
531 size_t cnt
= *nsteps
;
532 struct gconv_step
*steps
= *handle
;
535 if (steps
[--cnt
].counter
++ == 0)
537 steps
[--cnt
].shlib_handle
=
538 __gconv_find_shlib (steps
[--cnt
].modname
);
539 if (steps
[--cnt
].shlib_handle
== NULL
)
541 /* Oops, this is the second time we use this module (after
542 unloading) and this time loading failed!? */
543 while (++cnt
< *nsteps
)
544 __gconv_release_shlib (steps
[cnt
].shlib_handle
);
545 result
= GCONV_NOCONV
;
552 /* Release the lock. */
553 __libc_lock_unlock (lock
);
555 /* The following code is necessary since `find_derivation' will return
556 GCONV_OK even when no derivation was found but the same request
557 was processed before. I.e., negative results will also be cached. */
558 return (result
== GCONV_OK
559 ? (*handle
== NULL
? GCONV_NOCONV
: GCONV_OK
)
564 /* Release the entries of the modules list. */
567 __gconv_close_transform (struct gconv_step
*steps
, size_t nsteps
)
569 int result
= GCONV_OK
;
571 /* Acquire the lock. */
572 __libc_lock_lock (lock
);
575 if (steps
[nsteps
].shlib_handle
!= NULL
576 && --steps
[nsteps
].counter
== 0)
578 result
= __gconv_release_shlib (steps
[nsteps
].shlib_handle
);
579 if (result
!= GCONV_OK
)
581 steps
[nsteps
].shlib_handle
= NULL
;
584 /* Release the lock. */
585 __libc_lock_unlock (lock
);
591 /* Free all resources if necessary. */
592 static void __attribute__ ((unused
))
597 if (__gconv_alias_db
!= NULL
)
598 __tdestroy (__gconv_alias_db
, free
);
600 for (cnt
= 0; cnt
< __gconv_nmodules
; ++cnt
)
602 if (__gconv_modules_db
[cnt
]->from_regex
!= NULL
)
603 regfree ((regex_t
*) __gconv_modules_db
[cnt
]->from_regex
);
605 /* Modules which names do not start with a slash are builtin
606 transformations and the memory is not allocated dynamically. */
607 if (__gconv_modules_db
[cnt
]->module_name
[0] == '/')
608 free (__gconv_modules_db
[cnt
]);
611 if (known_derivations
!= NULL
)
612 __tdestroy (known_derivations
, free_derivation
);
615 text_set_element (__libc_subfreeres
, free_mem
);