]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
basic/utf8: do not read past end of string when looking for a multi-byte character
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9341.

src/basic/device-nodes.c
src/basic/utf8.c
src/basic/utf8.h
src/libudev/libudev-util.c
src/shared/ask-password-api.c
src/shared/json.c
src/test/test-utf8.c
test/fuzz/fuzz-journal-remote/oss-fuzz-9341 [new file with mode: 0644]

index 5fcdf24bd2f607c981cf779448832cc042c100ee..5ebe5b24832c0d58d806ffed3a6db7ca819dc63c 100644 (file)
@@ -28,7 +28,7 @@ int encode_devnode_name(const char *str, char *str_enc, size_t len) {
         for (i = 0, j = 0; str[i] != '\0'; i++) {
                 int seqlen;
 
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
+                seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1);
                 if (seqlen > 1) {
 
                         if (len-j < (size_t)seqlen)
index ffe0adb33c3f5f8113340de5d263aba52e230d0b..090c69d1400bad8bd43b33ddcd7df536e131a2d3 100644 (file)
@@ -128,14 +128,14 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
 
         assert(str);
 
-        for (p = str; length;) {
+        for (p = str; length > 0;) {
                 int encoded_len, r;
                 char32_t val;
 
-                encoded_len = utf8_encoded_valid_unichar(p);
-                if (encoded_len < 0 ||
-                    (size_t) encoded_len > length)
+                encoded_len = utf8_encoded_valid_unichar(p, length);
+                if (encoded_len < 0)
                         return false;
+                assert(encoded_len > 0 && (size_t) encoded_len <= length);
 
                 r = utf8_encoded_to_unichar(p, &val);
                 if (r < 0 ||
@@ -159,7 +159,7 @@ char *utf8_is_valid(const char *str) {
         while (*p) {
                 int len;
 
-                len = utf8_encoded_valid_unichar(p);
+                len = utf8_encoded_valid_unichar(p, (size_t) -1);
                 if (len < 0)
                         return NULL;
 
@@ -181,7 +181,7 @@ char *utf8_escape_invalid(const char *str) {
         while (*str) {
                 int len;
 
-                len = utf8_encoded_valid_unichar(str);
+                len = utf8_encoded_valid_unichar(str, (size_t) -1);
                 if (len > 0) {
                         s = mempcpy(s, str, len);
                         str += len;
@@ -208,7 +208,7 @@ char *utf8_escape_non_printable(const char *str) {
         while (*str) {
                 int len;
 
-                len = utf8_encoded_valid_unichar(str);
+                len = utf8_encoded_valid_unichar(str, (size_t) -1);
                 if (len > 0) {
                         if (utf8_is_printable(str, len)) {
                                 s = mempcpy(s, str, len);
@@ -452,17 +452,24 @@ static int utf8_unichar_to_encoded_len(char32_t unichar) {
 }
 
 /* validate one encoded unicode char and return its length */
-int utf8_encoded_valid_unichar(const char *str) {
+int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
         char32_t unichar;
         size_t len, i;
         int r;
 
         assert(str);
+        assert(length > 0);
+
+        /* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */
 
         len = utf8_encoded_expected_len(str[0]);
         if (len == 0)
                 return -EINVAL;
 
+        /* Do we have a truncated multi-byte character? */
+        if (len > length)
+                return -EINVAL;
+
         /* ascii is valid */
         if (len == 1)
                 return 1;
@@ -495,7 +502,7 @@ size_t utf8_n_codepoints(const char *str) {
         while (*str != 0) {
                 int k;
 
-                k = utf8_encoded_valid_unichar(str);
+                k = utf8_encoded_valid_unichar(str, (size_t) -1);
                 if (k < 0)
                         return (size_t) -1;
 
index 628456936eb31b3a9ba972d0e5473f85ec41a4b9..6df70921dbd9050c6292622d8f29ab0be57960ae 100644 (file)
@@ -32,7 +32,7 @@ char16_t *utf8_to_utf16(const char *s, size_t length);
 
 size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
 
-int utf8_encoded_valid_unichar(const char *str);
+int utf8_encoded_valid_unichar(const char *str, size_t length);
 int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
 
 static inline bool utf16_is_surrogate(char16_t c) {
index 7e21719fbf2c1e495f89491ab6d2837e6f6ef71a..37660d0313f0bbbdacb6d1096e62a5bca88f7f2d 100644 (file)
@@ -175,7 +175,7 @@ size_t util_replace_chars(char *str, const char *white) {
                 }
 
                 /* accept valid utf8 */
-                len = utf8_encoded_valid_unichar(&str[i]);
+                len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
                 if (len > 1) {
                         i += len;
                         continue;
index 072bf72c5697b31545925a1babf8fd22c896dd2b..bc5e1cf66929e4481538b4b0ff34e0d261a77bf0 100644 (file)
@@ -385,13 +385,13 @@ int ask_password_tty(
                                 if (!(flags & ASK_PASSWORD_SILENT))
                                         backspace_chars(ttyfd, 1);
 
-                                /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
-                                 * begins */
+                                /* Remove a full UTF-8 codepoint from the end. For that, figure out where the
+                                 * last one begins */
                                 q = 0;
                                 for (;;) {
                                         size_t z;
 
-                                        z = utf8_encoded_valid_unichar(passphrase + q);
+                                        z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
                                         if (z == 0) {
                                                 q = (size_t) -1; /* Invalid UTF8! */
                                                 break;
@@ -410,8 +410,8 @@ int ask_password_tty(
 
                                 flags |= ASK_PASSWORD_SILENT;
 
-                                /* There are two ways to enter silent mode. Either by pressing backspace as first key
-                                 * (and only as first key), or ... */
+                                /* There are two ways to enter silent mode. Either by pressing backspace as
+                                 * first key (and only as first key), or ... */
 
                                 if (ttyfd >= 0)
                                         (void) loop_write(ttyfd, "(no echo) ", 10, false);
@@ -440,7 +440,7 @@ int ask_password_tty(
 
                         if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
                                 /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
-                                n = utf8_encoded_valid_unichar(passphrase + codepoint);
+                                n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
                                 if (n >= 0) {
                                         codepoint = p;
                                         (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);
index 3786ff12b84648ada1cab7c7c158ba7156405c0e..7ae1ffb1b2df9af10efd63e67e498b14a474d861 100644 (file)
@@ -1756,7 +1756,6 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
         assert(s || n == 0);
 
         while (n > 0) {
-
                 if (*s == '\n') {
                         (*line)++;
                         *column = 1;
@@ -1765,7 +1764,7 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
                 else {
                         int w;
 
-                        w = utf8_encoded_valid_unichar(s);
+                        w = utf8_encoded_valid_unichar(s, n);
                         if (w < 0) /* count invalid unichars as normal characters */
                                 w = 1;
                         else if ((size_t) w > n) /* never read more than the specified number of characters */
@@ -1930,7 +1929,7 @@ static int json_parse_string(const char **p, char **ret) {
                         continue;
                 }
 
-                len = utf8_encoded_valid_unichar(c);
+                len = utf8_encoded_valid_unichar(c, (size_t) -1);
                 if (len < 0)
                         return len;
 
index 9849530ac88c33c0ac03d177da9114224a2d51fc..d1e48da2a665d0a0a229da3963abb61461cfaedb 100644 (file)
@@ -36,11 +36,21 @@ static void test_ascii_is_valid_n(void) {
 }
 
 static void test_utf8_encoded_valid_unichar(void) {
-        assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
-        assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
-        assert_se(utf8_encoded_valid_unichar("a") == 1);
-        assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
-        assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3);
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3);
+        assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2);
+        assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2);
+        assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2);
+        assert_se(utf8_encoded_valid_unichar("a", 1) == 1);
+        assert_se(utf8_encoded_valid_unichar("a", 2) == 1);
+        assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */
+        assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */
+        assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL);
 }
 
 static void test_utf8_escaping(void) {
diff --git a/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341
new file mode 100644 (file)
index 0000000..3ddac6b
Binary files /dev/null and b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 differ