basic/utf8: do not read past end of string when looking for a multi-byte character

author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)

committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c

index 5fcdf24bd2f607c981cf779448832cc042c100ee..5ebe5b24832c0d58d806ffed3a6db7ca819dc63c 100644 (file)
--- a/src/basic/device-nodes.c
+++ b/src/basic/device-nodes.c
@@ -28,7 +28,7 @@ int encode_devnode_name(const char *str, char *str_enc, size_t len) {
          for (i = 0, j = 0; str[i] != '\0'; i++) {
                  int seqlen;
  
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
+                seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1);
                  if (seqlen > 1) {
  
                          if (len-j < (size_t)seqlen)
diff --git a/src/basic/utf8.c b/src/basic/utf8.c

index ffe0adb33c3f5f8113340de5d263aba52e230d0b..090c69d1400bad8bd43b33ddcd7df536e131a2d3 100644 (file)
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -128,14 +128,14 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
  
          assert(str);
  
-        for (p = str; length;) {
+        for (p = str; length > 0;) {
                  int encoded_len, r;
                  char32_t val;
  
-                encoded_len = utf8_encoded_valid_unichar(p);
-                if (encoded_len < 0 ||
-                    (size_t) encoded_len > length)
+                encoded_len = utf8_encoded_valid_unichar(p, length);
+                if (encoded_len < 0)
                          return false;
+                assert(encoded_len > 0 && (size_t) encoded_len <= length);
  
                  r = utf8_encoded_to_unichar(p, &val);
                  if (r < 0 ||
@@ -159,7 +159,7 @@ char *utf8_is_valid(const char *str) {
          while (*p) {
                  int len;
  
-                len = utf8_encoded_valid_unichar(p);
+                len = utf8_encoded_valid_unichar(p, (size_t) -1);
                  if (len < 0)
                          return NULL;
  
@@ -181,7 +181,7 @@ char *utf8_escape_invalid(const char *str) {
          while (*str) {
                  int len;
  
-                len = utf8_encoded_valid_unichar(str);
+                len = utf8_encoded_valid_unichar(str, (size_t) -1);
                  if (len > 0) {
                          s = mempcpy(s, str, len);
                          str += len;
@@ -208,7 +208,7 @@ char *utf8_escape_non_printable(const char *str) {
          while (*str) {
                  int len;
  
-                len = utf8_encoded_valid_unichar(str);
+                len = utf8_encoded_valid_unichar(str, (size_t) -1);
                  if (len > 0) {
                          if (utf8_is_printable(str, len)) {
                                  s = mempcpy(s, str, len);
@@ -452,17 +452,24 @@ static int utf8_unichar_to_encoded_len(char32_t unichar) {
  }
  
  /* validate one encoded unicode char and return its length */
-int utf8_encoded_valid_unichar(const char *str) {
+int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
          char32_t unichar;
          size_t len, i;
          int r;
  
          assert(str);
+        assert(length > 0);
+
+        /* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */
  
          len = utf8_encoded_expected_len(str[0]);
          if (len == 0)
                  return -EINVAL;
  
+        /* Do we have a truncated multi-byte character? */
+        if (len > length)
+                return -EINVAL;
+
          /* ascii is valid */
          if (len == 1)
                  return 1;
@@ -495,7 +502,7 @@ size_t utf8_n_codepoints(const char *str) {
          while (*str != 0) {
                  int k;
  
-                k = utf8_encoded_valid_unichar(str);
+                k = utf8_encoded_valid_unichar(str, (size_t) -1);
                  if (k < 0)
                          return (size_t) -1;
  
diff --git a/src/basic/utf8.h b/src/basic/utf8.h

index 628456936eb31b3a9ba972d0e5473f85ec41a4b9..6df70921dbd9050c6292622d8f29ab0be57960ae 100644 (file)
--- a/src/basic/utf8.h
+++ b/src/basic/utf8.h
@@ -32,7 +32,7 @@ char16_t *utf8_to_utf16(const char *s, size_t length);
  
  size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
  
-int utf8_encoded_valid_unichar(const char *str);
+int utf8_encoded_valid_unichar(const char *str, size_t length);
  int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
  
  static inline bool utf16_is_surrogate(char16_t c) {
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c

index 7e21719fbf2c1e495f89491ab6d2837e6f6ef71a..37660d0313f0bbbdacb6d1096e62a5bca88f7f2d 100644 (file)
--- a/src/libudev/libudev-util.c
+++ b/src/libudev/libudev-util.c
@@ -175,7 +175,7 @@ size_t util_replace_chars(char *str, const char *white) {
                  }
  
                  /* accept valid utf8 */
-                len = utf8_encoded_valid_unichar(&str[i]);
+                len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
                  if (len > 1) {
                          i += len;
                          continue;
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c

index 072bf72c5697b31545925a1babf8fd22c896dd2b..bc5e1cf66929e4481538b4b0ff34e0d261a77bf0 100644 (file)
--- a/src/shared/ask-password-api.c
+++ b/src/shared/ask-password-api.c
@@ -385,13 +385,13 @@ int ask_password_tty(
                                  if (!(flags & ASK_PASSWORD_SILENT))
                                          backspace_chars(ttyfd, 1);
  
-                                /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
-                                 * begins */
+                                /* Remove a full UTF-8 codepoint from the end. For that, figure out where the
+                                 * last one begins */
                                  q = 0;
                                  for (;;) {
                                          size_t z;
  
-                                        z = utf8_encoded_valid_unichar(passphrase + q);
+                                        z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
                                          if (z == 0) {
                                                  q = (size_t) -1; /* Invalid UTF8! */
                                                  break;
@@ -410,8 +410,8 @@ int ask_password_tty(
  
                                  flags |= ASK_PASSWORD_SILENT;
  
-                                /* There are two ways to enter silent mode. Either by pressing backspace as first key
-                                 * (and only as first key), or ... */
+                                /* There are two ways to enter silent mode. Either by pressing backspace as
+                                 * first key (and only as first key), or ... */
  
                                  if (ttyfd >= 0)
                                          (void) loop_write(ttyfd, "(no echo) ", 10, false);
@@ -440,7 +440,7 @@ int ask_password_tty(
  
                          if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
                                  /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
-                                n = utf8_encoded_valid_unichar(passphrase + codepoint);
+                                n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
                                  if (n >= 0) {
                                          codepoint = p;
                                          (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);
diff --git a/src/shared/json.c b/src/shared/json.c

index 3786ff12b84648ada1cab7c7c158ba7156405c0e..7ae1ffb1b2df9af10efd63e67e498b14a474d861 100644 (file)
--- a/src/shared/json.c
+++ b/src/shared/json.c
@@ -1756,7 +1756,6 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
          assert(s || n == 0);
  
          while (n > 0) {
-
                  if (*s == '\n') {
                          (*line)++;
                          *column = 1;
@@ -1765,7 +1764,7 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
                  else {
                          int w;
  
-                        w = utf8_encoded_valid_unichar(s);
+                        w = utf8_encoded_valid_unichar(s, n);
                          if (w < 0) /* count invalid unichars as normal characters */
                                  w = 1;
                          else if ((size_t) w > n) /* never read more than the specified number of characters */
@@ -1930,7 +1929,7 @@ static int json_parse_string(const char **p, char **ret) {
                          continue;
                  }
  
-                len = utf8_encoded_valid_unichar(c);
+                len = utf8_encoded_valid_unichar(c, (size_t) -1);
                  if (len < 0)
                          return len;
  
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c

index 9849530ac88c33c0ac03d177da9114224a2d51fc..d1e48da2a665d0a0a229da3963abb61461cfaedb 100644 (file)
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -36,11 +36,21 @@ static void test_ascii_is_valid_n(void) {
  }
  
  static void test_utf8_encoded_valid_unichar(void) {
-        assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
-        assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
-        assert_se(utf8_encoded_valid_unichar("a") == 1);
-        assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
-        assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3);
+        assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3);
+        assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */
+        assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2);
+        assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2);
+        assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2);
+        assert_se(utf8_encoded_valid_unichar("a", 1) == 1);
+        assert_se(utf8_encoded_valid_unichar("a", 2) == 1);
+        assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */
+        assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */
+        assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL);
  }
  
  static void test_utf8_escaping(void) {
diff --git a/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341

new file mode 100644 (file)

index 0000000..3ddac6b

Binary files /dev/null and b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 differ
author	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
committer	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Tue, 26 Feb 2019 11:37:40 +0000 (12:37 +0100)
src/basic/device-nodes.c		patch \| blob \| blame \| history
src/basic/utf8.c		patch \| blob \| blame \| history
src/basic/utf8.h		patch \| blob \| blame \| history
src/libudev/libudev-util.c		patch \| blob \| blame \| history
src/shared/ask-password-api.c		patch \| blob \| blame \| history
src/shared/json.c		patch \| blob \| blame \| history
src/test/test-utf8.c		patch \| blob \| blame \| history
test/fuzz/fuzz-journal-remote/oss-fuzz-9341	[new file with mode: 0644]	patch \| blob