From 69785d361448c85cd2df2a79d6aeba30f0984229 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sat, 14 Jun 2008 10:36:30 -0400 Subject: [PATCH] Use wctomb(), which is present in C90, instead of wcrtomb(), which wasn't introduced until C99. In particular, this allows the new locale support to work on FreeBSD 4, which doesn't implement C99 wide character functions. Include an autoconf check for wctomb(); if it's not present at all, use the built-in UTF-8 functions instead so that pax extended headers (which are stored in UTF-8) can still be minimally supported. SVN-Revision: 113 --- configure.ac | 2 +- libarchive/archive_string.c | 56 ++++++++++++++----------------------- libarchive/config_freebsd.h | 1 + 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/configure.ac b/configure.ac index b620e912a..9751bead7 100644 --- a/configure.ac +++ b/configure.ac @@ -246,7 +246,7 @@ AC_CHECK_FUNCS([lutimes memmove memset mkdir mkfifo mknod]) AC_CHECK_FUNCS([nl_langinfo pipe poll select setenv setlocale]) AC_CHECK_FUNCS([strchr strdup strerror strrchr timegm]) AC_CHECK_FUNCS([tzset unsetenv utime utimes vfork]) -AC_CHECK_FUNCS([wcscpy wcslen wmemcmp wmemcpy]) +AC_CHECK_FUNCS([wcscpy wcslen wctomb wmemcmp wmemcpy]) # FreeBSD's nl_langinfo supports an option to specify whether the # current locale uses month/day or day/month ordering. It makes the diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index 077e9ab0a..c7dc9eb35 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -41,14 +41,6 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_string.c,v 1.12 2008/05/26 17:00: #include #endif -#ifdef __sgi -/* - * The following prototype is missing on IRXI, - * even though the function is implemented in libc. - */ -size_t wcrtomb(char *, wchar_t, mbstate_t *); -#endif - #include "archive_private.h" #include "archive_string.h" @@ -172,16 +164,13 @@ __archive_strappend_int(struct archive_string *as, int d, int base) } /* - * Home-grown wcrtomb for UTF-8. + * Home-grown wctomb for UTF-8. */ -static size_t -my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s) +static int +my_wctomb_utf8(char *p, wchar_t wc) { - (void)s; /* UNUSED */ - if (p == NULL) - /* Since this routine never uses shift state, we don't - * need to clear it here. */ + /* UTF-8 doesn't use shift states. */ return (0); if (wc <= 0x7f) { p[0] = (char)wc; @@ -215,15 +204,14 @@ my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s) static int my_wcstombs(struct archive_string *as, const wchar_t *w, - size_t (*func)(char *, wchar_t, mbstate_t *)) + int (*func)(char *, wchar_t)) { size_t n; char *p; - mbstate_t shift_state; char buff[256]; /* Clear the shift state before starting. */ - memset(&shift_state, 0, sizeof(shift_state)); + (*func)(NULL, L'\0'); /* * Convert one wide char at a time into 'buff', whenever that @@ -238,7 +226,7 @@ my_wcstombs(struct archive_string *as, const wchar_t *w, archive_strcat(as, buff); p = buff; } - n = (*func)(p, *w++, &shift_state); + n = (*func)(p, *w++); if (n == (size_t)-1) return (-1); p += n; @@ -255,7 +243,7 @@ my_wcstombs(struct archive_string *as, const wchar_t *w, struct archive_string * __archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w) { - if (my_wcstombs(as, w, my_wcrtomb_utf8)) + if (my_wcstombs(as, w, my_wctomb_utf8)) return (NULL); return (as); } @@ -264,36 +252,34 @@ __archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w) * Translates a wide character string into current locale character set * and appends to the archive_string. Note: returns NULL if conversion * fails. - * - * TODO: use my_wcrtomb_utf8 if !HAVE_WCRTOMB (add configure logic first!) */ struct archive_string * __archive_strappend_w_mbs(struct archive_string *as, const wchar_t *w) { - if (my_wcstombs(as, w, wcrtomb)) +#if HAVE_WCTOMB + if (my_wcstombs(as, w, wctomb)) + return (NULL); +#else + /* TODO: Can we do better than this? Are there platforms + * that have locale support but don't have wctomb()? */ + if (my_wcstombs(as, w, my_wctomb_utf8)) return (NULL); +#endif return (as); } /* - * Home-grown mbrtowc for UTF-8. Some systems lack UTF-8 - * (or even lack mbrtowc()) and we need UTF-8 support for pax + * Home-grown mbtowc for UTF-8. Some systems lack UTF-8 + * (or even lack mbtowc()) and we need UTF-8 support for pax * format. So please don't replace this with a call to the - * standard mbrtowc() function! + * standard mbtowc() function! */ static size_t -my_mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +my_mbtowc_utf8(wchar_t *pwc, const char *s, size_t n) { int ch; - /* - * This argument is here to make the prototype identical to the - * standard mbrtowc(), so I can build generic string processors - * that just accept a pointer to a suitable mbrtowc() function. - */ - (void)ps; /* UNUSED */ - /* Standard behavior: a NULL value for 's' just resets shift state. */ if (s == NULL) return (0); @@ -364,7 +350,7 @@ __archive_string_utf8_w(struct archive_string *as) dest = ws; src = as->s; while (*src != '\0') { - n = my_mbrtowc_utf8(dest, src, 8, NULL); + n = my_mbtowc_utf8(dest, src, 8); if (n == 0) break; if (n == (size_t)-1 || n == (size_t)-2) { diff --git a/libarchive/config_freebsd.h b/libarchive/config_freebsd.h index 3f8c71964..e146438a7 100644 --- a/libarchive/config_freebsd.h +++ b/libarchive/config_freebsd.h @@ -108,6 +108,7 @@ #define HAVE_WCHAR_H 1 #define HAVE_WCSCPY 1 #define HAVE_WCSLEN 1 +#define HAVE_WCTOMB 1 #define HAVE_WMEMCMP 1 #define HAVE_WMEMCPY 1 #define HAVE_ZLIB_H 1 -- 2.47.3