lib/unicode: optimize Unicode_FindLastSubstrInRange

author VMware, Inc <>

Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)

committer Marcelo Vanzin <mvanzin@vmware.com>

Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)
author VMware, Inc <>
Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)
committer Marcelo Vanzin <mvanzin@vmware.com>
Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)
diff --git a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c

index 6d0951eb22dffd22ec4ea20d36129e420c745d5c..d7c75e6f202b8091ad1700807e9cf1258b5950cd 100644 (file)
--- a/open-vm-tools/lib/unicode/unicodeSimpleOperations.c
+++ b/open-vm-tools/lib/unicode/unicodeSimpleOperations.c
@@ -330,6 +330,9 @@ Unicode_FindLastSubstrInRange(ConstUnicode str,              // IN:
                                UnicodeIndex strToFindLength)  // IN:
  {
     UnicodeIndex index;
+   UnicodeIndex strToFindEnd;
+   uint32 *utf32Source = NULL;
+   uint32 *utf32Search = NULL;
  
     ASSERT(str);
     ASSERT(strStart >= 0);
@@ -339,6 +342,22 @@ Unicode_FindLastSubstrInRange(ConstUnicode str,              // IN:
     ASSERT(strToFindStart >= 0);
     ASSERT((strToFindLength >= 0) || (strToFindLength == -1));
  
+   /*
+    * Convert the string to be searched and the search string to UTF32.
+    */
+
+   if (!CodeSet_UTF8ToUTF32(str, (char **) &utf32Source)) {
+      Panic("%s: invalid UTF8 string @ %p\n", __FUNCTION__, str);
+   }
+
+   if (!CodeSet_UTF8ToUTF32(strToFind, (char **) &utf32Search)) {
+      Panic("%s: invalid UTF8 string @ %p\n", __FUNCTION__, strToFind);
+   }
+
+   /*
+    * Do any bounds cleanup and checking that is necessary...
+    */
+
     if (strLength < 0) {
        strLength = CodeSet_LengthInCodePoints(str) - strStart;
     }
@@ -348,30 +367,50 @@ Unicode_FindLastSubstrInRange(ConstUnicode str,              // IN:
     }
  
     if (strLength < strToFindLength) {
-      return UNICODE_INDEX_NOT_FOUND;
+      index = UNICODE_INDEX_NOT_FOUND;
+      goto bail;
     }
  
     if (strToFindLength == 0) {
-      return strStart;
+      index = strStart;
+      goto bail;
     }
+  
+   /*
+    * Attempt to find the last occurence of the search string in the string
+    * to be searched.
+    */
  
-   index = UNICODE_INDEX_NOT_FOUND;
+   strToFindEnd = strToFindStart + strToFindLength - 1;
  
-   while (TRUE) {
-      UnicodeIndex searchIndex;
+   for (index = strStart + strLength - 1; index >= strStart; index--) {
+      if (utf32Source[index] == utf32Search[strToFindEnd]) {
+         UnicodeIndex strSubOffset = index;
+         UnicodeIndex strToFindSubOffset = strToFindEnd;
  
-      searchIndex = Unicode_FindSubstrInRange(str, strStart,
-                                              strLength, strToFind,
-                                              strToFindStart, strToFindLength);
+         while (TRUE) {
+            if (strToFindSubOffset == strToFindStart) {
+               index = strSubOffset;  // Found the substring.
+               goto bail;
+            }
  
-      if (searchIndex == UNICODE_INDEX_NOT_FOUND) {
-         break;
-      } else {
-         index = searchIndex;
-         strStart = searchIndex + 1;
+            strToFindSubOffset--;
+            strSubOffset--;
+
+            if (utf32Source[strSubOffset] != utf32Search[strToFindSubOffset]) {
+               break;
+            }
+         }
        }
     }
  
+   index = UNICODE_INDEX_NOT_FOUND;
+
+bail:
+
+   free(utf32Source);
+   free(utf32Search);
+
     return index;
  }
  
@@ -408,8 +447,8 @@ Unicode_Substr(ConstUnicode str,     // IN:
                 UnicodeIndex length)  // IN:
  {
     char *substr;
-   uint32 *utf32;
     uint32 codePointLen;
+   uint32 *utf32 = NULL;
  
     ASSERT(str);
     ASSERT((start >= 0) || (start == -1));
author	VMware, Inc <>
	Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)
committer	Marcelo Vanzin <mvanzin@vmware.com>
	Mon, 20 Dec 2010 21:46:41 +0000 (13:46 -0800)