From: Ben Schmidt Date: Tue, 11 Feb 2014 22:19:03 +0000 (+1100) Subject: Use iconv to convert unknown character sets. X-Git-Tag: RELEASE_1_2_19b1~5 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d4bfa0bd3fd2d0d22dd94ed726a9e1fccc218909;p=thirdparty%2Fmlmmj.git Use iconv to convert unknown character sets. --HG-- extra : rebase_source : e2be5470dac9823c5d475506c5b9f0c34323e0d8 --- diff --git a/ChangeLog b/ChangeLog index eaf8deaf..d8ca81a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + o Use iconv to convert unknown character sets o Handle unfolded header lines better o Add a tunable for moderation request lifetime (Timo Boettcher) o Ensure mlmmj-send always honours tunables (e.g. relayhost) diff --git a/configure.ac b/configure.ac index fecbfb88..4c6e806c 100644 --- a/configure.ac +++ b/configure.ac @@ -14,11 +14,13 @@ AC_PROG_CC # Checks for libraries. AC_CHECK_LIB(socket,socket) AC_CHECK_LIB(nsl,gethostbyname) +AC_CHECK_LIB(iconv,iconv_open) # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([arpa/inet.h fcntl.h netinet/in.h stddef.h stdlib.h string.h]) AC_CHECK_HEADERS([sys/socket.h syslog.h unistd.h time.h]) +AC_CHECK_HEADER([iconv.h]) # Checks for typedefs, structures, and compiler characteristics. AC_C_CONST diff --git a/src/unistr.c b/src/unistr.c index 722b5c8e..a10c6529 100644 --- a/src/unistr.c +++ b/src/unistr.c @@ -30,12 +30,16 @@ #include #include #include +#include #include "mlmmj.h" #include "unistr.h" #include "log_error.h" #include "memory.h" +/* This is allocated on the stack, so it can't be too big. */ +#define ICONV_BUFFER_SIZE 160 + unistr *unistr_new(void) { @@ -180,6 +184,48 @@ void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len) } +void unistr_append_iconv(unistr *str, char *binary, size_t bin_len, + const char * charset) +{ + char bytes[ICONV_BUFFER_SIZE]; + char * buffer; + size_t bufferleft; + iconv_t cd; + + cd = iconv_open("UTF-8", charset); + if (cd == (iconv_t)-1) { + unistr_append_usascii(str, "???", 3); + return; + } + + while (bin_len > 0) { + buffer = bytes; + bufferleft = ICONV_BUFFER_SIZE; + if (iconv(cd, &binary, &bin_len, &buffer, &bufferleft) == (size_t)-1) { + if (errno == EILSEQ) { + /* illegal sequence; try to recover */ + unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft); + unistr_append_usascii(str, "?", 1); + bin_len--; + binary++; + continue; + } else if (errno == EINVAL) { + /* incomplete sequence; we're done */ + unistr_append_usascii(str, "?", 1); + break; + } else if (errno != E2BIG) { + /* some other error; abort */ + unistr_append_usascii(str, "???", 1); + break; + } + } + /* success or buffer full */ + unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft); + } + iconv_close(cd); +} + + void unistr_dump(const unistr *str) { unsigned int i; @@ -421,8 +467,7 @@ static void header_decode_word(char *word, unistr *ret) } else if (strcasecmp(charset, "iso-8859-1") == 0) { unistr_append_iso88591(ret, binary, bin_len); } else { - /* unknown charset */ - unistr_append_usascii(ret, "???", 3); + unistr_append_iconv(ret, binary, bin_len, charset); } myfree(my_word);