]> git.ipfire.org Git - thirdparty/mlmmj.git/commitdiff
Use iconv to convert unknown character sets.
authorBen Schmidt <none@none>
Tue, 11 Feb 2014 22:19:03 +0000 (09:19 +1100)
committerBen Schmidt <none@none>
Tue, 11 Feb 2014 22:19:03 +0000 (09:19 +1100)
--HG--
extra : rebase_source : e2be5470dac9823c5d475506c5b9f0c34323e0d8

ChangeLog
configure.ac
src/unistr.c

index eaf8deafc485d90a175ee0aedf8b50f6eefd193e..d8ca81a029bc183c50725947eae0b37ef5234940 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4 @@
+ o Use iconv to convert unknown character sets
  o Handle unfolded header lines better
  o Add a tunable for moderation request lifetime (Timo Boettcher)
  o Ensure mlmmj-send always honours tunables (e.g. relayhost)
index fecbfb8843d05b7f0a26185671ceacfd44bad68f..4c6e806cd7530fbff7763b7d59f70358aa7e1395 100644 (file)
@@ -14,11 +14,13 @@ AC_PROG_CC
 # Checks for libraries.
 AC_CHECK_LIB(socket,socket)
 AC_CHECK_LIB(nsl,gethostbyname)
+AC_CHECK_LIB(iconv,iconv_open)
 
 # Checks for header files.
 AC_HEADER_STDC
 AC_CHECK_HEADERS([arpa/inet.h fcntl.h netinet/in.h stddef.h stdlib.h string.h])
 AC_CHECK_HEADERS([sys/socket.h syslog.h unistd.h time.h])
+AC_CHECK_HEADER([iconv.h])
 
 # Checks for typedefs, structures, and compiler characteristics.
 AC_C_CONST
index 722b5c8ecfcc8982b93609d32e99230ff2a427ea..a10c65299a51f83aa3b8a04d45c4fd4ddc1991cd 100644 (file)
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <ctype.h>
+#include <iconv.h>
 
 #include "mlmmj.h"
 #include "unistr.h"
 #include "log_error.h"
 #include "memory.h"
 
+/* This is allocated on the stack, so it can't be too big. */
+#define ICONV_BUFFER_SIZE 160
+
 
 unistr *unistr_new(void)
 {
@@ -180,6 +184,48 @@ void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len)
 }
 
 
+void unistr_append_iconv(unistr *str, char *binary, size_t bin_len,
+               const char * charset)
+{
+       char bytes[ICONV_BUFFER_SIZE];
+       char * buffer;
+       size_t bufferleft;
+       iconv_t cd;
+
+       cd = iconv_open("UTF-8", charset);
+       if (cd == (iconv_t)-1) {
+               unistr_append_usascii(str, "???", 3);
+               return;
+       }
+
+       while (bin_len > 0) {
+               buffer = bytes;
+               bufferleft = ICONV_BUFFER_SIZE;
+               if (iconv(cd, &binary, &bin_len, &buffer, &bufferleft) == (size_t)-1) {
+                       if (errno == EILSEQ) {
+                               /* illegal sequence; try to recover */
+                               unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
+                               unistr_append_usascii(str, "?", 1);
+                               bin_len--;
+                               binary++;
+                               continue;
+                       } else if (errno == EINVAL) {
+                               /* incomplete sequence; we're done */
+                               unistr_append_usascii(str, "?", 1);
+                               break;
+                       } else if (errno != E2BIG) {
+                               /* some other error; abort */
+                               unistr_append_usascii(str, "???", 1);
+                               break;
+                       }
+               }
+               /* success or buffer full */
+               unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
+       }
+       iconv_close(cd);
+}
+
+
 void unistr_dump(const unistr *str)
 {
        unsigned int i;
@@ -421,8 +467,7 @@ static void header_decode_word(char *word, unistr *ret)
        } else if (strcasecmp(charset, "iso-8859-1") == 0) {
                unistr_append_iso88591(ret, binary, bin_len);
        } else {
-               /* unknown charset */
-               unistr_append_usascii(ret, "???", 3);
+               unistr_append_iconv(ret, binary, bin_len, charset);
        }
 
        myfree(my_word);