]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
authorTimo Sirainen <tss@iki.fi>
Sat, 10 Jan 2015 02:25:21 +0000 (04:25 +0200)
committerTimo Sirainen <tss@iki.fi>
Sat, 10 Jan 2015 02:25:21 +0000 (04:25 +0200)
Instead the incomplete input was just being modified into broken output.

src/lib-charset/charset-iconv.c
src/lib-charset/charset-utf8.c
src/lib-charset/charset-utf8.h

index 2cbf9ec5aec3d7a423462d3751a7c68fa1d30d23..8564aa166a6f2cd6d383f5d4f011388c4dc37b48 100644 (file)
@@ -53,20 +53,6 @@ void charset_to_utf8_reset(struct charset_translation *t)
                (void)iconv(t->cd, NULL, NULL, NULL, NULL);
 }
 
-static int
-charset_append_utf8(struct charset_translation *t,
-                   const void *src, size_t src_size, buffer_t *dest)
-{
-       if (t->normalizer != NULL)
-               return t->normalizer(src, src_size, dest);
-       else if (!uni_utf8_get_valid_data(src, src_size, dest))
-               return -1;
-       else {
-               buffer_append(dest, src, src_size);
-               return 0;
-       }
-}
-
 static bool
 charset_to_utf8_try(struct charset_translation *t,
                    const unsigned char *src, size_t *src_size, buffer_t *dest,
@@ -74,15 +60,12 @@ charset_to_utf8_try(struct charset_translation *t,
 {
        ICONV_CONST char *ic_srcbuf;
        char tmpbuf[8192], *ic_destbuf;
-       size_t srcleft, destleft;
+       size_t srcleft, destleft, tmpbuf_used;
        bool ret = TRUE;
 
        if (t->cd == (iconv_t)-1) {
                /* input is already supposed to be UTF-8 */
-               if (charset_append_utf8(t, src, *src_size, dest) < 0)
-                       *result = CHARSET_RET_INVALID_INPUT;
-               else
-                       *result = CHARSET_RET_OK;
+               *result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
                return TRUE;
        }
        destleft = sizeof(tmpbuf);
@@ -109,8 +92,9 @@ charset_to_utf8_try(struct charset_translation *t,
        /* we just converted data to UTF-8. it shouldn't be invalid, but
           Solaris iconv appears to pass invalid data through sometimes
           (e.g. 8 bit characters with UTF-7) */
-       if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft,
-                               dest) < 0)
+       tmpbuf_used = sizeof(tmpbuf) - destleft;
+       if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf,
+                                &tmpbuf_used, dest) != CHARSET_RET_OK)
                *result = CHARSET_RET_INVALID_INPUT;
        return ret;
 }
index b07c8f13a85fa3f15ae84153e88373fd97331780..e0092feb65fc2f60c0b829dde6d33337f17d7a38 100644 (file)
@@ -70,15 +70,31 @@ enum charset_result
 charset_to_utf8(struct charset_translation *t,
                const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-       if (t->normalizer != NULL) {
-               if (t->normalizer(src, *src_size, dest) < 0)
+       return charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
+}
+
+#endif
+
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+                    const unsigned char *src, size_t *src_size, buffer_t *dest)
+{
+       enum charset_result res = CHARSET_RET_OK;
+       size_t pos;
+
+       uni_utf8_partial_strlen_n(src, *src_size, &pos);
+       if (pos < *src_size) {
+               *src_size = pos;
+               res = CHARSET_RET_INCOMPLETE_INPUT;
+       }
+
+       if (normalizer != NULL) {
+               if (normalizer(src, *src_size, dest) < 0)
                        return CHARSET_RET_INVALID_INPUT;
        } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
                return CHARSET_RET_INVALID_INPUT;
        } else {
                buffer_append(dest, src, *src_size);
        }
-       return CHARSET_RET_OK;
+       return res;
 }
-
-#endif
index d5cf27879b6aeb3d1789549bbaae62eaa736f0b0..16e24a1a5fb826e7e9b249fc4465358909d97786 100644 (file)
@@ -32,4 +32,9 @@ int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
                        const char *input, string_t *output,
                        enum charset_result *result_r) ATTR_NULL(2);
 
+/* INTERNAL: */
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+                    const unsigned char *src, size_t *src_size, buffer_t *dest);
+
 #endif