From: VMware, Inc <> Date: Mon, 20 Dec 2010 21:44:25 +0000 (-0800) Subject: lib/unicode: Unicode_Substr is broken X-Git-Tag: 2010.12.19-339835~58 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=fd327a1c98b9dea1414888ee8a5de4cb944e95e9;p=thirdparty%2Fopen-vm-tools.git lib/unicode: Unicode_Substr is broken There is a confusion of code points and code units within lib/unicode that needs to be cleaned up. Here I fix Unicode_Substr using the technique I plan to use across most of the function in unicodeSimpleOperations.c. More changes will ensue until the base PR is fixed. Signed-off-by: Marcelo Vanzin --- diff --git a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c index 4b2ac68f4..89750fa0d 100644 --- a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c +++ b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c @@ -379,9 +379,12 @@ Unicode_FindLastSubstrInRange(ConstUnicode str, // IN * Indices and lengths that are out of bounds are pinned to the * edges of the string. * - * Pass -1 for any length parameter to indicate "from start until + * Pass -1 for the length parameter to indicate "from start until * end of string". * + * The start and length arguments are in code points - unicode + * "characters" - not bytes! + * * Results: * The newly-allocated substring of 'str' in the range [index, * index + length). Caller must free with Unicode_Free. @@ -393,13 +396,42 @@ Unicode_FindLastSubstrInRange(ConstUnicode str, // IN */ Unicode -Unicode_Substr(ConstUnicode str, // IN - UnicodeIndex start, // IN - UnicodeIndex length) // IN +Unicode_Substr(ConstUnicode str, // IN: + UnicodeIndex start, // IN: + UnicodeIndex length) // IN: { - UnicodePinIndices(str, &start, &length); + char *substr; + uint32 *utf32; + uint32 codePointLen; + + ASSERT(str); + ASSERT((start >= 0) || (start == -1)); + ASSERT((length >= 0) || (length == -1)); + + if (!CodeSet_UTF8ToUTF32(str, (char **) &utf32)) { + Panic("%s: invalid UTF8 string @ %p\n", __FUNCTION__, str); + } + + codePointLen = 0; + while (utf32[codePointLen] != 0) { + codePointLen++; + } + + if ((start < 0) || (start > codePointLen)) { + start = codePointLen; + } + + if ((length < 0) || ((start + length) > codePointLen)) { + length = codePointLen - start; + } + + utf32[start + length] = 0; + + CodeSet_UTF32ToUTF8((char *) &utf32[start], &substr); + + free(utf32); - return Util_SafeStrndup(((const char *)str) + start, length); + return substr; } @@ -453,13 +485,10 @@ Unicode_ReplaceRange(ConstUnicode destination, // IN result = Util_SafeMalloc(resultLength + 1); // Start with the destination bytes before the substring to be replaced. - memcpy(result, - destination, - destinationStart); + memcpy(result, destination, destinationStart); // Insert the substring of source in place of the destination substring. - memcpy(result + destinationStart, - (const char *)source + sourceStart, + memcpy(result + destinationStart, (const char *) source + sourceStart, sourceLength); // Append the remaining bytes of destination after the replaced substring.