From: John Wolfe Date: Tue, 12 Jul 2022 16:56:01 +0000 (-0700) Subject: Escape all control characters in JSON. X-Git-Tag: stable-12.1.0~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e5e9a9ba31791d4f10e7dd75a8dbab179926511f;p=thirdparty%2Fopen-vm-tools.git Escape all control characters in JSON. Update CodeSet_JsonEscape to escape all control characters between U+0000 and U+001F as required by the JSON standard. Delete CodeSet_Utf8Escape as it is no longer used. Also update datasets in-guest API tests accordingly. --- diff --git a/open-vm-tools/lib/include/codeset.h b/open-vm-tools/lib/include/codeset.h index af9f8b752..9a93f1fec 100644 --- a/open-vm-tools/lib/include/codeset.h +++ b/open-vm-tools/lib/include/codeset.h @@ -394,14 +394,6 @@ Bool CodeSet_IsStringValidUTF8(const char *string); // IN: Bool CodeSet_IsValidUTF8String(const char *bufIn, // IN: size_t sizeIn); // IN: -typedef struct { - char c; - char *escape; -} CodeSetEscapeEntry; - -char *CodeSet_Utf8Escape(const char *utf8, // IN: - const CodeSetEscapeEntry *entries); // IN: - char *CodeSet_JsonEscape(const char *utf8); // IN: char *CodeSet_JsonUnescape(const char *utf8); // IN: diff --git a/open-vm-tools/lib/misc/jsonUTF8.c b/open-vm-tools/lib/misc/jsonUTF8.c index 816bca795..722897147 100644 --- a/open-vm-tools/lib/misc/jsonUTF8.c +++ b/open-vm-tools/lib/misc/jsonUTF8.c @@ -1,5 +1,5 @@ /********************************************************* - * Copyright (C) 2020-2021 VMware, Inc. All rights reserved. + * Copyright (C) 2020-2022 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published @@ -21,55 +21,42 @@ #include "codeset.h" #include "vm_ctype.h" #include "dynbuf.h" +#include "strutil.h" #include "unicodeBase.h" /* *----------------------------------------------------------------------------- * - * CodeSetFindEscape -- + * CodeSet_JsonEscape -- * - * Is there an escape for the specified character? + * Escape a unicode string following JSON rules. * - * The last entry in the characters to be escaped entry must have - * a 'c' on '\0' and an 'escape' of NULL. + * From https://www.rfc-editor.org/rfc/rfc8259.html#section-7: * - * Results: - * NULL No. - * !NULL Yes. Pointer to the escape entry for the specified character. - * - * Side effects: - * None + * ... All Unicode characters may be placed within the + * quotation marks, except for the characters that MUST be escaped: + * quotation mark, reverse solidus, and the control characters (U+0000 + * through U+001F). * - *----------------------------------------------------------------------------- - */ - -static const CodeSetEscapeEntry * -CodeSetFindEscape(char c, // IN: - const CodeSetEscapeEntry *entries) // IN: -{ - const CodeSetEscapeEntry *e; - - for (e = entries; e->escape != NULL; e++) { - if (c == e->c) { - return e; - } - } - - return NULL; -} - - -/* - *----------------------------------------------------------------------------- + * ... If the character is in the Basic + * Multilingual Plane (U+0000 through U+FFFF), then it may be + * represented as a six-character sequence: a reverse solidus, followed + * by the lowercase letter u, followed by four hexadecimal digits that + * encode the character's code point.... * - * CodeSet_Utf8Escape -- + * Alternatively, there are two-character sequence escape + * representations of some popular characters. So, for example, a + * string containing only a single reverse solidus character may be + * represented more compactly as "\\" * - * Escape the ASCII characters specified by the escape entries - * within a UTF8 string. + * ... * - * The last entry in the characters to be escaped entry must have - * a 'c' on '\0' and an 'escape' of NULL. + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 * * Results: * NULL Failure! @@ -83,8 +70,7 @@ CodeSetFindEscape(char c, // IN: */ char * -CodeSet_Utf8Escape(const char *utf8, // IN: - const CodeSetEscapeEntry *entries) // IN: +CodeSet_JsonEscape(const char *utf8) // IN: { DynBuf b; char *res; @@ -110,18 +96,35 @@ CodeSet_Utf8Escape(const char *utf8, // IN: break; } - if (len == 1) { // ASCII - const CodeSetEscapeEntry *e = CodeSetFindEscape(*p, entries); - - if (e == NULL) { - DynBuf_Append(&b, p, len); - } else { - DynBuf_Append(&b, e->escape, strlen(e->escape)); + if (len > 1 || (*utf8 > 0x001F && *utf8 != '"' && *utf8 != '\\')) { + DynBuf_SafeAppend(&b, p, len); + } else { + DynBuf_SafeAppend(&b, "\\", 1); + switch (*p) { + case '"': + case '\\': + DynBuf_SafeAppend(&b, p, 1); + break; + case '\b': + DynBuf_SafeAppend(&b, "b", 1); + break; + case '\f': + DynBuf_SafeAppend(&b, "f", 1); + break; + case '\n': + DynBuf_SafeAppend(&b, "n", 1); + break; + case '\r': + DynBuf_SafeAppend(&b, "r", 1); + break; + case '\t': + DynBuf_SafeAppend(&b, "t", 1); + break; + default: + StrUtil_SafeDynBufPrintf(&b, "u%04x", *p); + break; } - } else { // All others - DynBuf_Append(&b, p, len); } - p += len; } @@ -137,50 +140,6 @@ CodeSet_Utf8Escape(const char *utf8, // IN: } -/* - *----------------------------------------------------------------------------- - * - * CodeSet_JsonEscape -- - * - * Escape a unicode string following JSON rules. - * - * Backspace (\b) - * Form Feed (\f) - * Line Feed (\n) - * Carriage Return (\r) - * Tab (\t) - * Backslash (\) - * Double Quote (") - * - * Results: - * NULL Failure! - * !NULL Success! The escaped string. The caller is responsible to free - * this. - * - * Side effects: - * Memory is allocated - * - *----------------------------------------------------------------------------- - */ - -char * -CodeSet_JsonEscape(const char *utf8) // IN: -{ - static const CodeSetEscapeEntry JsonEscapes[] = { - { '\b', "\\b" }, - { '\f', "\\f" }, - { '\n', "\\n" }, - { '\r', "\\r" }, - { '\t', "\\t" }, - { '\\', "\\\\" }, - { '\"', "\\\"" }, - { '\0', NULL } // MUST BE LAST - }; - - return CodeSet_Utf8Escape(utf8, JsonEscapes); -} - - /* Constants used by json unescape routines. */ /* Number of hex digits in a "\u" escape sequence. */ @@ -417,32 +376,32 @@ CodeSet_JsonUnescapeOne(const char *p, // IN: * end up in the default case of the switch and fail. */ switch (*p) { - case '\"': - case '\\': - case '/': - outBuf[0] = *p; - break; - case 'b': - outBuf[0] = '\b'; - break; - case 'f': - outBuf[0] = '\f'; - break; - case 'r': - outBuf[0] = '\r'; - break; - case 'n': - outBuf[0] = '\n'; - break; - case 't': - outBuf[0] = '\t'; - break; - case 'u': - len = CodeSet_JsonUnescapeU(start, end, outBuf); - break; - default: - len = 0; - break; + case '\"': + case '\\': + case '/': + outBuf[0] = *p; + break; + case 'b': + outBuf[0] = '\b'; + break; + case 'f': + outBuf[0] = '\f'; + break; + case 'r': + outBuf[0] = '\r'; + break; + case 'n': + outBuf[0] = '\n'; + break; + case 't': + outBuf[0] = '\t'; + break; + case 'u': + len = CodeSet_JsonUnescapeU(start, end, outBuf); + break; + default: + len = 0; + break; } } return len; @@ -503,9 +462,9 @@ CodeSet_JsonUnescape(const char *utf8) // IN: if (len == 0) { success = FALSE; } else if (len > 1 || *p != '\\') { - DynBuf_Append(&b, p, len); + DynBuf_SafeAppend(&b, p, len); } else if ((len = CodeSet_JsonUnescapeOne(p, end, unescaped)) != 0) { - DynBuf_Append(&b, unescaped, strlen(unescaped)); + DynBuf_SafeAppend(&b, unescaped, strlen(unescaped)); } else { success = FALSE; }