From: VMware, Inc <> Date: Mon, 20 Dec 2010 21:45:14 +0000 (-0800) Subject: lib/unicode: fix Unicode_ReplaceRange X-Git-Tag: 2010.12.19-339835~57 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=decd9354f63e941cfe729617af2c9cee80af577d;p=thirdparty%2Fopen-vm-tools.git lib/unicode: fix Unicode_ReplaceRange Continuing the lib/unicode bug hunt, Unicode_ReplaceRange gets fixed. Make Unicode_Join not use Unicode_Append so there is no potential recursion problems between routines. Signed-off-by: Marcelo Vanzin --- diff --git a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c index 89750fa0d..4bcbb0050 100644 --- a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c +++ b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c @@ -57,6 +57,9 @@ * Pass -1 for any length parameter to indicate "from start until * end of string". * + * The start and length arguments are in code points - unicode + * "characters" - not bytes! + * * Results: * -1 if str1 < str2, 0 if str1 == str2, 1 if str1 > str2. * @@ -91,10 +94,11 @@ Unicode_CompareRange(ConstUnicode str1, // IN UnicodePinIndices(str2, &str2Start, &str2Length); /* - * TODO: Allocating substrings is a performance hit. We should do - * this search in-place. (However, searching UTF-8 requires tender loving + * TODO: Allocating substrings is a performance hit. We should do this + * search in-place. (However, searching UTF-8 requires tender loving * care, and it's just easier to search UTF-16.) */ + substr1 = Unicode_Substr(str1, str1Start, str1Length); if (!substr1) { goto out; @@ -108,6 +112,7 @@ Unicode_CompareRange(ConstUnicode str1, // IN /* * XXX TODO: Need to normalize the incoming strings to NFC or NFD. */ + substr1UTF16 = Unicode_GetAllocUTF16(substr1); if (!substr1UTF16) { goto out; @@ -119,10 +124,10 @@ Unicode_CompareRange(ConstUnicode str1, // IN } /* - * TODO: This is the naive string search algorithm, which is - * O(n * m). We can do better with KMP or Boyer-Moore if this - * proves to be a bottleneck. + * TODO: This is the naive string search algorithm, which is O(n * m). We + * can do better with KMP or Boyer-Moore if this proves to be a bottleneck. */ + while (TRUE) { codeUnit1 = *(substr1UTF16 + i); codeUnit2 = *(substr2UTF16 + i); @@ -156,9 +161,9 @@ Unicode_CompareRange(ConstUnicode str1, // IN } /* - * The two UTF-16 code units differ. If they're the first code unit - * of a surrogate pair (for Unicode values past U+FFFF), decode the - * surrogate pair into a full Unicode code point. + * The two UTF-16 code units differ. If they're the first code unit of a + * surrogate pair (for Unicode values past U+FFFF), decode the surrogate + * pair into a full Unicode code point. */ if (U16_IS_SURROGATE(codeUnit1)) { @@ -453,6 +458,9 @@ Unicode_Substr(ConstUnicode str, // IN: * Pass -1 for any length parameter to indicate "from start until * end of string". * + * The start and length arguments are in code points - unicode + * "characters" - not bytes! + * * Results: * A newly-allocated string containing the results of the replace * operation. Caller must free with Unicode_Free. @@ -464,41 +472,37 @@ Unicode_Substr(ConstUnicode str, // IN: */ Unicode -Unicode_ReplaceRange(ConstUnicode destination, // IN - UnicodeIndex destinationStart, // IN - UnicodeIndex destinationLength, // IN - ConstUnicode source, // IN - UnicodeIndex sourceStart, // IN - UnicodeIndex sourceLength) // IN +Unicode_ReplaceRange(ConstUnicode dest, // IN: + UnicodeIndex destStart, // IN: + UnicodeIndex destLength, // IN: + ConstUnicode src, // IN: + UnicodeIndex srcStart, // IN: + UnicodeIndex srcLength) // IN: { - UnicodeIndex destNumCodeUnits; - UnicodeIndex resultLength; - char *result; - - UnicodePinIndices(destination, &destinationStart, &destinationLength); - UnicodePinIndices(source, &sourceStart, &sourceLength); - - destNumCodeUnits = Unicode_LengthInCodeUnits(destination); - - resultLength = destNumCodeUnits - destinationLength + sourceLength; + Unicode result; + Unicode stringOne; + Unicode stringTwo; + Unicode stringThree; - result = Util_SafeMalloc(resultLength + 1); + ASSERT(dest); + ASSERT((destStart >= 0) || (destStart == -1)); + ASSERT((destLength >= 0) || (destLength == -1)); - // Start with the destination bytes before the substring to be replaced. - memcpy(result, destination, destinationStart); + ASSERT(src); + ASSERT((srcStart >= 0) || (srcStart == -1)); + ASSERT((srcLength >= 0) || (srcLength == -1)); - // Insert the substring of source in place of the destination substring. - memcpy(result + destinationStart, (const char *) source + sourceStart, - sourceLength); + stringOne = Unicode_Substr(dest, 0, destStart); + stringTwo = Unicode_Substr(src, srcStart, srcLength); + stringThree = Unicode_Substr(dest, destStart + destLength, -1); - // Append the remaining bytes of destination after the replaced substring. - memcpy(result + destinationStart + sourceLength, - (const char *)destination + destinationStart + destinationLength, - destNumCodeUnits - destinationStart - destinationLength); + result = Unicode_Join(stringOne, stringTwo, stringThree, NULL); - result[resultLength] = '\0'; + Unicode_Free(stringOne); + Unicode_Free(stringTwo); + Unicode_Free(stringThree); - return (Unicode)result; + return result; } @@ -523,29 +527,30 @@ Unicode_ReplaceRange(ConstUnicode destination, // IN Unicode Unicode_Join(ConstUnicode first, // IN: - ...) // IN + ...) // IN: { - va_list args; Unicode result; - ConstUnicode cur; if (first == NULL) { - return NULL; - } + result = NULL; + } else { + va_list args; + ConstUnicode cur; - result = Unicode_Duplicate(first); + result = Unicode_Duplicate(first); - va_start(args, first); + va_start(args, first); - while ((cur = va_arg(args, ConstUnicode)) != NULL) { - Unicode temp; + while ((cur = va_arg(args, ConstUnicode)) != NULL) { + Unicode temp; - temp = Unicode_Append(result, cur); - Unicode_Free(result); - result = temp; - } + temp = Unicode_Format("%s%s", result, cur); + Unicode_Free(result); + result = temp; + } - va_end(args); + va_end(args); + } return result; }