From: John Wolfe Date: Tue, 24 Aug 2021 03:13:37 +0000 (-0700) Subject: Replace the use of appInfo's internal json escape function with X-Git-Tag: stable-12.0.0~117 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a6923583d4519019e25860abcd16c2205d8a3fde;p=thirdparty%2Fopen-vm-tools.git Replace the use of appInfo's internal json escape function with libmisc's CodeSet_JsonEscape(). --- diff --git a/open-vm-tools/lib/misc/Makefile.am b/open-vm-tools/lib/misc/Makefile.am index 8678f515b..d968f9235 100644 --- a/open-vm-tools/lib/misc/Makefile.am +++ b/open-vm-tools/lib/misc/Makefile.am @@ -1,5 +1,5 @@ ################################################################################ -### Copyright (C) 2007-2017 VMware, Inc. All rights reserved. +### Copyright (C) 2007-2017,2021 VMware, Inc. All rights reserved. ### ### This program is free software; you can redistribute it and/or modify ### it under the terms of version 2 of the GNU General Public License as @@ -34,6 +34,7 @@ libMisc_la_SOURCES += hostname.c libMisc_la_SOURCES += hostType.c libMisc_la_SOURCES += idLinux.c libMisc_la_SOURCES += iovector.c +libMisc_la_SOURCES += jsonUTF8.c libMisc_la_SOURCES += logFixed.c libMisc_la_SOURCES += machineID.c libMisc_la_SOURCES += miscSolaris.c diff --git a/open-vm-tools/lib/misc/jsonUTF8.c b/open-vm-tools/lib/misc/jsonUTF8.c new file mode 100644 index 000000000..816bca795 --- /dev/null +++ b/open-vm-tools/lib/misc/jsonUTF8.c @@ -0,0 +1,524 @@ +/********************************************************* + * Copyright (C) 2020-2021 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation version 2.1 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + *********************************************************/ + +#include +#include "vmware.h" +#include "codeset.h" +#include "vm_ctype.h" +#include "dynbuf.h" +#include "unicodeBase.h" + + +/* + *----------------------------------------------------------------------------- + * + * CodeSetFindEscape -- + * + * Is there an escape for the specified character? + * + * The last entry in the characters to be escaped entry must have + * a 'c' on '\0' and an 'escape' of NULL. + * + * Results: + * NULL No. + * !NULL Yes. Pointer to the escape entry for the specified character. + * + * Side effects: + * None + * + *----------------------------------------------------------------------------- + */ + +static const CodeSetEscapeEntry * +CodeSetFindEscape(char c, // IN: + const CodeSetEscapeEntry *entries) // IN: +{ + const CodeSetEscapeEntry *e; + + for (e = entries; e->escape != NULL; e++) { + if (c == e->c) { + return e; + } + } + + return NULL; +} + + +/* + *----------------------------------------------------------------------------- + * + * CodeSet_Utf8Escape -- + * + * Escape the ASCII characters specified by the escape entries + * within a UTF8 string. + * + * The last entry in the characters to be escaped entry must have + * a 'c' on '\0' and an 'escape' of NULL. + * + * Results: + * NULL Failure! + * !NULL Success! The escaped string. The caller is responsible to free + * this. + * + * Side effects: + * Memory is allocated + * + *----------------------------------------------------------------------------- + */ + +char * +CodeSet_Utf8Escape(const char *utf8, // IN: + const CodeSetEscapeEntry *entries) // IN: +{ + DynBuf b; + char *res; + const char *p; + const char *end; + Bool success = TRUE; + + ASSERT(utf8 != NULL); + if (utf8 == NULL) { + return NULL; + } + + DynBuf_Init(&b); + + p = utf8; + end = p + strlen(utf8); + + while (p < end) { + uint32 len = CodeSet_GetUtf8(p, end, NULL); + + if (len == 0) { + success = FALSE; + break; + } + + if (len == 1) { // ASCII + const CodeSetEscapeEntry *e = CodeSetFindEscape(*p, entries); + + if (e == NULL) { + DynBuf_Append(&b, p, len); + } else { + DynBuf_Append(&b, e->escape, strlen(e->escape)); + } + } else { // All others + DynBuf_Append(&b, p, len); + } + + p += len; + } + + if (success) { + res = DynBuf_DetachString(&b); + } else { + res = NULL; + } + + DynBuf_Destroy(&b); + + return res; +} + + +/* + *----------------------------------------------------------------------------- + * + * CodeSet_JsonEscape -- + * + * Escape a unicode string following JSON rules. + * + * Backspace (\b) + * Form Feed (\f) + * Line Feed (\n) + * Carriage Return (\r) + * Tab (\t) + * Backslash (\) + * Double Quote (") + * + * Results: + * NULL Failure! + * !NULL Success! The escaped string. The caller is responsible to free + * this. + * + * Side effects: + * Memory is allocated + * + *----------------------------------------------------------------------------- + */ + +char * +CodeSet_JsonEscape(const char *utf8) // IN: +{ + static const CodeSetEscapeEntry JsonEscapes[] = { + { '\b', "\\b" }, + { '\f', "\\f" }, + { '\n', "\\n" }, + { '\r', "\\r" }, + { '\t', "\\t" }, + { '\\', "\\\\" }, + { '\"', "\\\"" }, + { '\0', NULL } // MUST BE LAST + }; + + return CodeSet_Utf8Escape(utf8, JsonEscapes); +} + + +/* Constants used by json unescape routines. */ + +/* Number of hex digits in a "\u" escape sequence. */ +#define JSON_UESC_NDIGITS 4 + +/* + * Maximum number of UTF-8 code units (bytes) per Unicode code point. + * From bora/lib/unicode/unicode/utf8.h. + */ +#ifndef U8_MAX_LENGTH +#define U8_MAX_LENGTH 4 +#endif + +/* + *---------------------------------------------------------------------------- + * + * CodeSet_JsonGetHex -- + * + * Retrieve and convert to an integer the four hex digits that are + * part of the six character escape sequence that starts with "\u". + * + * On entry, p points to the first code point following "\u." + * + * Results: + * TRUE on success, with *value set to the integer value. + * + * FALSE on failure. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------------- + */ + +static Bool +CodeSet_JSonGetHex(const char *p, // IN: + const char *end, // IN: + int32 *value) // OUT: +{ + char hexBuf[JSON_UESC_NDIGITS + 1]; /* +1 for NUL */ + int numHexDigits = 0; + + ASSERT(p <= end); + + /* + * Assumes called with p set to first code point following "\u" and looks + * for four hex digits. No need to call CodeSet_GetUtf8 to verify that + * the code point length of these characters is one since it's always on + * a code point boundary and it's OK to check directly for specific + * ASCII characters in such a case, and if there's a match to an ASCII + * character then advancing the pointer by a single character will advance + * to the next code point. + */ + while (numHexDigits < JSON_UESC_NDIGITS) { + if (p >= end || !CType_IsXDigit(*p)) { + return FALSE; + } + hexBuf[numHexDigits++] = *p++; + } + + hexBuf[numHexDigits] = '\0'; + *value = strtol(hexBuf, NULL, 16); + return TRUE; +} + + +/* + *---------------------------------------------------------------------------- + * + * CodeSet_JsonUnescapeU -- + * + * Handle a JSON escape sequence beginning with "\u", consisting either + * of: + * (1) "\u" followed by four hex digits; or + * (2) two such consecutive sequences encoding a character + * outside the Basic MultiLingual Plane as a UTF-16 + * surrogate pair. + * + * Note "\u0000" is not allowed and is considered an error if + * encountered. + * + * On entry to the routine, p should be pointing at the backslash + * character that starts the (possible) escape sequence. + * + * outBuf is the base of a char array of size >= U8_MAX_LENGTH + 1, i.e., + * large enough to hold a NUL-terminated UTF-8 encoding of any Unicode + * code point. + * + * Results: + * On success, the length of the escape sequence, with the unescaped + * result plus a NUL terminator in outBuf. + * + * 0 on failure. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------------- + */ + +static int +CodeSet_JsonUnescapeU(const char *p, // IN: + const char *end, // IN: + char *outBuf) // OUT: +{ + uint32 w; + uint32 utf32Buf[2]; /* code point value plus 0 terminator */ + char *utf8String; + uint32 len; + const char *start = p; + + /* + * Assumes called only if starts with "\u". No need to call + * CodeSet_GetUtf8 in this ASSERT since this is checking for specific ASCII + * characters - see comment preceding ASSERT in CodeSet_JsonUnescapeOne + * below. + */ + ASSERT(p < end && *p == '\\'); + ASSERT(&p[1] < end && p[1] == 'u'); + + /* Code point of 0 ("\u0000") not allowed. */ + if (!CodeSet_JSonGetHex(&p[2], end, &w) || w == 0) { + return 0; + } + + /* Advance p past "\u" and the hex digits that follow. */ + p += 2 + JSON_UESC_NDIGITS; + + /* If the value is a leading surrogate, then handle the trailing one. */ + if (U16_IS_LEAD(w)) { + uint32 trail; + + /* + * Check for '\', 'u', and four digits representing a trailer. As + * elsewhere, no need to call CodeSet_GetUtf8 since this is checking for + * specific ASCII characters, and bails out if any of the checks fail. + */ + if (p < end && *p++ == '\\' && p < end && *p++ == 'u' && + CodeSet_JSonGetHex(p, end, &trail) && U16_IS_TRAIL(trail)) { + w = U16_GET_SUPPLEMENTARY(w, trail); + + /* Advance p past the digits that follow "\u". */ + p += JSON_UESC_NDIGITS; + } else { + return 0; + } + } else if (U16_IS_TRAIL(w)) { + return 0; + } + + /* + * To get the UTF-8 for this code point, create a UTF-32 string + * and convert to UTF-8. + */ + utf32Buf[0] = w; + utf32Buf[1] = 0; /* needs a 4-byte 0 terminator */ + + if (!CodeSet_UTF32ToUTF8((char *)utf32Buf, &utf8String)) { + return 0; + } + + len = strlen(utf8String); + ASSERT(Unicode_IsBufferValid(utf8String, len, STRING_ENCODING_UTF8)); + ASSERT(len <= U8_MAX_LENGTH); + memcpy(outBuf, utf8String, len + 1); + + free(utf8String); + return p - start; +} + + +/* + *---------------------------------------------------------------------------- + * + * CodeSet_JsonUnescapeOne -- + * + * Handle a single JSON escape sequence. + * + * On entry to the routine, p should be pointing at the backslash + * character that starts the (possible) escape sequence. + * + * outBuf is the base of a char array of size >= U8_MAX_LENGTH + 1, i.e., + * large enough to hold a NUL-terminated UTF-8 encoding of any Unicode + * code point. + * + * Results: + * On success, the length of the escape sequence, with the unescaped + * result plus a NUL terminator in outBuf. + * + * 0 on failure. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------------- + */ + +static int +CodeSet_JsonUnescapeOne(const char *p, // IN: + const char *end, // IN: + char *outBuf) // OUT: +{ + int len = 0; + const char *start = p; + + /* + * Assumes called only if first character is '\'. Note that in the + * ASSERT it's not necessary to call CodeSet_GetUtf8 to verify the + * code point length is 1. Since this is on a code point boundary, + * if the byte matches a specific ASCII character (in this case, '\') + * that is sufficient to verify the code point length of 1. + */ + ASSERT(p < end && *p == '\\'); + + /* + * Advance p by a single char to get to the next code point since it's + * known to be an ASCII character (i.e., '\') and therefore code point + * length is 1. + */ + if (++p < end) { + /* + * Preset len and outBuf for common case of valid two-character escape + * sequence with one-character output; different values will be assigned + * if the sequence turns out to start with "\u" or is invalid. + */ + len = 2; + outBuf[1] = '\0'; + + /* + * As above, since this on a code point boundary and checking whether + * it matches specific ASCII characters, it's not necessary to call + * CodeSet_GetUtf8 to verify that the code point length is 1. In the + * event *p is the first byte of a multi-byte UTF-8 code point, we'll + * end up in the default case of the switch and fail. + */ + switch (*p) { + case '\"': + case '\\': + case '/': + outBuf[0] = *p; + break; + case 'b': + outBuf[0] = '\b'; + break; + case 'f': + outBuf[0] = '\f'; + break; + case 'r': + outBuf[0] = '\r'; + break; + case 'n': + outBuf[0] = '\n'; + break; + case 't': + outBuf[0] = '\t'; + break; + case 'u': + len = CodeSet_JsonUnescapeU(start, end, outBuf); + break; + default: + len = 0; + break; + } + } + return len; +} + + +/* + *----------------------------------------------------------------------------- + * + * CodeSet_JsonUnescape -- + * + * Copy a UTF8 string, reverting any JSON escape sequences found within + * the string according to the STD-90 spec at + * https://tools.ietf.org/html/std90. This processes the same + * escape sequences that are allowed by the jsmn parser, and generally + * tries to follow the same logic as the jsmn escape parsing. Any + * strings passed in to this routine have likely been through jsmn, and + * any invalid escape sequences should have been rejected. However, this + * routine and those it calls still check for the possibility of + * invalid escape sequences and return failure when running into one, as + * opposed to assuming and/or asserting they are valid. + * + * A general unescape routine is difficult to do, so the logic here is + * specific to JSON (as opposed to CodeSet_JsonEscape, which relies on + * the more general CodeSet_Utf8Escape). + * + * Results: + * NULL Failure! + * !NULL Success! The un-escaped string. The caller is responsible to free + * this. + * + * Side effects: + * Returns a dynamically allocated string that must be freed by the + * caller. + * + *----------------------------------------------------------------------------- + */ + +char * +CodeSet_JsonUnescape(const char *utf8) // IN: +{ + DynBuf b; + char *res; + const char *p; + const char *end; + Bool success = TRUE; + + ASSERT(utf8 != NULL); + + DynBuf_Init(&b); + p = utf8; + end = p + strlen(p); + + while (p < end && success) { + char unescaped[U8_MAX_LENGTH + 1]; /* +1 for NUL */ + uint32 len = CodeSet_GetUtf8(p, end, NULL); + + if (len == 0) { + success = FALSE; + } else if (len > 1 || *p != '\\') { + DynBuf_Append(&b, p, len); + } else if ((len = CodeSet_JsonUnescapeOne(p, end, unescaped)) != 0) { + DynBuf_Append(&b, unescaped, strlen(unescaped)); + } else { + success = FALSE; + } + p += len; + } + + if (success) { + res = DynBuf_DetachString(&b); + } else { + res = NULL; + } + + DynBuf_Destroy(&b); + + return res; +} diff --git a/open-vm-tools/services/plugins/appInfo/appInfo.c b/open-vm-tools/services/plugins/appInfo/appInfo.c index cab928bc3..0f70f4c44 100644 --- a/open-vm-tools/services/plugins/appInfo/appInfo.c +++ b/open-vm-tools/services/plugins/appInfo/appInfo.c @@ -30,6 +30,7 @@ #include "appInfoInt.h" #include "vmware.h" +#include "codeset.h" #include "conf.h" #include "dynbuf.h" #include "escape.h" @@ -50,10 +51,6 @@ VM_EMBED_VERSION(VMTOOLSD_VERSION_STRING); #endif -#if defined(_WIN32) -#include "codeset.h" -#endif - /** * Maximum size of the packet size that appInfo plugin should send * to the VMX. Currently, this is set to 62 KB. @@ -102,51 +99,6 @@ static GSource *gAppInfoTimeoutSource = NULL; static void TweakGatherLoop(ToolsAppCtx *ctx, gboolean force); -/* - ***************************************************************************** - * EscapeJSONString -- - * - * Escapes a string to be included in JSON content. - * - * @param[in] str The string to be escaped. - * - * @retval Pointer to a heap-allocated memory. This holds the escaped content - * of the string passed by the caller. - * - ***************************************************************************** - */ - -static char * -EscapeJSONString(const char *str) // IN -{ - /* - * Escape '"' and '\' characters in the JSON string. - */ - - static const int bytesToEscape[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // " - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }; - - return Escape_DoString("\\u00", bytesToEscape, str, strlen(str), - NULL); -} - - /* ***************************************************************************** * SetGuestInfo -- @@ -333,7 +285,7 @@ AppInfoGatherTask(ToolsAppCtx *ctx, // IN goto next_entry; } - escapedCmd = EscapeJSONString(appInfo->appName); + escapedCmd = CodeSet_JsonEscape(appInfo->appName); if (NULL == escapedCmd) { g_warning("%s: Failed to escape the content of cmdName.\n", @@ -341,7 +293,7 @@ AppInfoGatherTask(ToolsAppCtx *ctx, // IN goto quit; } - escapedVersion = EscapeJSONString(appInfo->version); + escapedVersion = CodeSet_JsonEscape(appInfo->version); if (NULL == escapedVersion) { g_warning("%s: Failed to escape the content of version information.\n", __FUNCTION__);