]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
util/iconv: reject improperly packed UTF-8
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>
Thu, 8 Apr 2021 09:18:46 +0000 (21:18 +1200)
committerJeremy Allison <jra@samba.org>
Fri, 18 Jun 2021 03:39:28 +0000 (03:39 +0000)
If we allow a string that encodes say '\0' as a multi-byte sequence,
we are open to confusion where we mix NUL terminated strings with
sized data blobs, which is to say EVERYWHERE.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14684

Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Jeremy Allison <jra@samba.org>
lib/util/charset/iconv.c
selftest/knownfail.d/str-utf8 [deleted file]

index 1f2d49c0e27d566521a3179f9eea1a0b6868b3dd..43b3306b0deb6f2f9328ca8687d836c727039065 100644 (file)
@@ -832,6 +832,11 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                        }
                        uc[1] = (c[0]>>2) & 0x7;
                        uc[0] = (c[0]<<6) | (c[1]&0x3f);
+                       if (uc[1] == 0 && uc[0] < 0x80) {
+                               /* this should have been a single byte */
+                               errno = EILSEQ;
+                               goto error;
+                       }
                        c  += 2;
                        in_left  -= 2;
                        out_left -= 2;
@@ -840,14 +845,24 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                }
 
                if ((c[0] & 0xf0) == 0xe0) {
+                       unsigned int codepoint;
                        if (in_left < 3 ||
                            (c[1] & 0xc0) != 0x80 ||
                            (c[2] & 0xc0) != 0x80) {
                                errno = EILSEQ;
                                goto error;
                        }
-                       uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
-                       uc[0] = (c[1]<<6) | (c[2]&0x3f);
+                       codepoint = ((c[2] & 0x3f)        |
+                                    ((c[1] & 0x3f) << 6) |
+                                    ((c[0] & 0x0f) << 12));
+
+                       if (codepoint < 0x800) {
+                               /* this should be a 1 or 2 byte sequence */
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       uc[0] = codepoint & 0xff;
+                       uc[1] = codepoint >> 8;
                        c  += 3;
                        in_left  -= 3;
                        out_left -= 2;
@@ -870,15 +885,10 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                                ((c[1]&0x3f)<<12) |
                                ((c[0]&0x7)<<18);
                        if (codepoint < 0x10000) {
-                               /* accept UTF-8 characters that are not
-                                  minimally packed, but pack the result */
-                               uc[0] = (codepoint & 0xFF);
-                               uc[1] = (codepoint >> 8);
-                               c += 4;
-                               in_left -= 4;
-                               out_left -= 2;
-                               uc += 2;
-                               continue;
+                               /* reject UTF-8 characters that are not
+                                  minimally packed */
+                               errno = EILSEQ;
+                               goto error;
                        }
 
                        codepoint -= 0x10000;
diff --git a/selftest/knownfail.d/str-utf8 b/selftest/knownfail.d/str-utf8
deleted file mode 100644 (file)
index b003ea8..0000000
+++ /dev/null
@@ -1 +0,0 @@
-^samba4.local.str.+utf8_[234]