string: Check UTF-8 string pointer and length

author teor <teor@torproject.org>

Tue, 7 Jan 2020 07:09:25 +0000 (17:09 +1000)

committer Nick Mathewson <nickm@torproject.org>

Tue, 7 Jan 2020 15:16:08 +0000 (10:16 -0500)
author teor <teor@torproject.org>
Tue, 7 Jan 2020 07:09:25 +0000 (17:09 +1000)
committer Nick Mathewson <nickm@torproject.org>
Tue, 7 Jan 2020 15:16:08 +0000 (10:16 -0500)
diff --git a/src/lib/string/util_string.c b/src/lib/string/util_string.c

index f5061a11d26690082aff0c17560858c34a525690..93b3eb09f21c58f7b3824ad0a144fe8c714740bc 100644 (file)
--- a/src/lib/string/util_string.c
+++ b/src/lib/string/util_string.c
@@ -506,6 +506,23 @@ validate_char(const uint8_t *c, uint8_t len)
  int
  string_is_utf8(const char *str, size_t len)
  {
+  // If str is NULL, don't try to read it
+  if (!str) {
+    // We could test for this case, but the low-level logs would produce
+    // confusing test output.
+    // LCOV_EXCL_START
+    if (len) {
+      // Use the low-level logging function, so that the log module can
+      // validate UTF-8 (if needed in future code)
+      tor_log_err_sigsafe(
+        "BUG: string_is_utf8() called with NULL str but non-zero len.");
+      // Since it's a bug, we should probably reject this string
+      return false;
+    }
+    // LCOV_EXCL_STOP
+    return true;
+  }
+
    for (size_t i = 0; i < len;) {
      uint8_t num_bytes = bytes_in_char(str[i]);
      if (num_bytes == 0) // Invalid leading byte found.
@@ -530,8 +547,8 @@ string_is_utf8(const char *str, size_t len)
  int
  string_is_utf8_no_bom(const char *str, size_t len)
  {
-  if (len >= 3 && (!strcmpstart(str, "\uFEFF") ||
-                   !strcmpstart(str, "\uFFFE"))) {
+  if (str && len >= 3 && (!strcmpstart(str, "\uFEFF") ||
+                          !strcmpstart(str, "\uFFFE"))) {
      return false;
    }
    return string_is_utf8(str, len);
author	teor <teor@torproject.org>
	Tue, 7 Jan 2020 07:09:25 +0000 (17:09 +1000)
committer	Nick Mathewson <nickm@torproject.org>
	Tue, 7 Jan 2020 15:16:08 +0000 (10:16 -0500)