Uninline CodeSet_Utf8FindCodePointBoundary

author Oliver Kurth <okurth@vmware.com>

Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)

committer Oliver Kurth <okurth@vmware.com>

Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)
author Oliver Kurth <okurth@vmware.com>
Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)
committer Oliver Kurth <okurth@vmware.com>
Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)
diff --git a/open-vm-tools/lib/include/codeset.h b/open-vm-tools/lib/include/codeset.h

index 66f42109951fa6d09668709b4f61c96513fe24dd..23a0b8d1fe5954e16c4e3789e8ba9170b90956a1 100644 (file)
--- a/open-vm-tools/lib/include/codeset.h
+++ b/open-vm-tools/lib/include/codeset.h
@@ -391,6 +391,9 @@ Bool CodeSet_IsValidUTF8(const char *bufIn,  // IN:
  
  Bool CodeSet_IsStringValidUTF8(const char *string);  // IN:
  
+size_t CodeSet_Utf8FindCodePointBoundary(const char *buf,   // IN:
+                                         size_t offset);    // IN:
+
  /*
   *-----------------------------------------------------------------------------
   *
@@ -456,91 +459,6 @@ CodeSet_Utf16ToUtf8(const utf16_t *strW)  // IN:
  }
  
  
-/*
- *-----------------------------------------------------------------------------
- *
- * CodeSet_Utf8FindCodePointBoundary
- *
- *      Determine if buf[offset] is a valid UTF-8 code point boundary
- *      and find the previous boundary if it is not. The contents of
- *      buf[offset] need not be defined, only data prior to this
- *      location is examined. Useful for finding a suitable place to
- *      put a NUL terminator.
- *
- * Results:
- *
- *      Returns the offset of the byte immediately following the last
- *      complete UTF-8 code point in buf that is entirely within the
- *      range [0, offset-1]. Note that if the final UTF-8 code point
- *      is complete, the input offset will be returned unchanged.
- *
- * Side effects:
- *      None
- *
- *-----------------------------------------------------------------------------
- */
-
-static INLINE size_t
-CodeSet_Utf8FindCodePointBoundary(const char *buf,   // IN
-                                  size_t offset)     // IN
-{
-   size_t origOffset = offset;
-   signed char c;
-
-   if (offset > 0) {
-
-      /*
-       * Back up 1 byte and then find the start of the UTF-8 code
-       * point occupying that location.
-       */
-
-      offset--;
-      while (offset > 0 && (buf[offset] & 0xc0) == 0x80) {
-         offset--;
-      }
-
-      /*
-       * Maximum UTF-8 code point length is 4
-       */
-
-      ASSERT(origOffset - offset <= 4);
-
-      c = buf[offset];
-
-      /*
-       * The first byte of a UTF-8 code point needs to be one of
-       * 0b0XXXXXXX, 0b110XXXXX, 0b1110XXXX, 0b11110XXX
-       */
-
-      ASSERT(c >= 0 || (c >> 5) == -2 || (c >> 4) == -2 || (c >> 3) == -2);
-
-      /*
-       * offset now points to the start of a UTF-8 code point. If it
-       * is a single byte or if the length, as encoded in the first
-       * byte, matches the number of bytes we have backed up, then the
-       * entire code point is present, so the original offset is a
-       * valid code point starting offset.
-       *
-       * Length is encoded as
-       * 2 bytes: 0b110XXXXX
-       * 3 bytes: 0b1110XXXX
-       * 4 bytes: 0b11110XXX
-       * Thus the first byte is -2 when shifted right (signed) by
-       * (7 - length).
-       */
-
-      if (c >= 0 || (c >> (7 - origOffset + offset)) == -2) {
-         return origOffset;
-      }
-
-      /*
-       * Else we truncated a code point. Return its starting point.
-       */
-   }
-   return offset;
-}
-
-
  /*
   *-----------------------------------------------------------------------------
   *
diff --git a/open-vm-tools/lib/misc/codesetBase.c b/open-vm-tools/lib/misc/codesetBase.c

index 27346b09386680c541b81f24414267bd92c45bb5..8f614e603f81baac994d59063237f1e8ff42e274 100644 (file)
--- a/open-vm-tools/lib/misc/codesetBase.c
+++ b/open-vm-tools/lib/misc/codesetBase.c
@@ -391,3 +391,87 @@ CodeSet_UTF32ToUTF8(const char *utf32,  // IN:
     return TRUE;
  }
  
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * CodeSet_Utf8FindCodePointBoundary
+ *
+ *      Determine if buf[offset] is a valid UTF-8 code point boundary
+ *      and find the previous boundary if it is not. The contents of
+ *      buf[offset] need not be defined, only data prior to this
+ *      location is examined. Useful for finding a suitable place to
+ *      put a NUL terminator.
+ *
+ * Results:
+ *
+ *      Returns the offset of the byte immediately following the last
+ *      complete UTF-8 code point in buf that is entirely within the
+ *      range [0, offset-1]. Note that if the final UTF-8 code point
+ *      is complete, the input offset will be returned unchanged.
+ *
+ * Side effects:
+ *      None
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+size_t
+CodeSet_Utf8FindCodePointBoundary(const char *buf,   // IN
+                                  size_t offset)     // IN
+{
+   size_t origOffset = offset;
+   signed char c;
+
+   if (offset > 0) {
+
+      /*
+       * Back up 1 byte and then find the start of the UTF-8 code
+       * point occupying that location.
+       */
+
+      offset--;
+      while (offset > 0 && (buf[offset] & 0xc0) == 0x80) {
+         offset--;
+      }
+
+      /*
+       * Maximum UTF-8 code point length is 4
+       */
+
+      ASSERT(origOffset - offset <= 4);
+
+      c = buf[offset];
+
+      /*
+       * The first byte of a UTF-8 code point needs to be one of
+       * 0b0XXXXXXX, 0b110XXXXX, 0b1110XXXX, 0b11110XXX
+       */
+
+      ASSERT(c >= 0 || (c >> 5) == -2 || (c >> 4) == -2 || (c >> 3) == -2);
+
+      /*
+       * offset now points to the start of a UTF-8 code point. If it
+       * is a single byte or if the length, as encoded in the first
+       * byte, matches the number of bytes we have backed up, then the
+       * entire code point is present, so the original offset is a
+       * valid code point starting offset.
+       *
+       * Length is encoded as
+       * 2 bytes: 0b110XXXXX
+       * 3 bytes: 0b1110XXXX
+       * 4 bytes: 0b11110XXX
+       * Thus the first byte is -2 when shifted right (signed) by
+       * (7 - length).
+       */
+
+      if (c >= 0 || (c >> (7 - origOffset + offset)) == -2) {
+         return origOffset;
+      }
+
+      /*
+       * Else we truncated a code point. Return its starting point.
+       */
+   }
+   return offset;
+}
diff --git a/open-vm-tools/rpctool/Makefile.am b/open-vm-tools/rpctool/Makefile.am

index cb05cf8ae4c6a71c47485517105201cb04f37684..1067f827127243e99c9a13e43898e8824eb303ca 100644 (file)
--- a/open-vm-tools/rpctool/Makefile.am
+++ b/open-vm-tools/rpctool/Makefile.am
@@ -25,4 +25,5 @@ vmware_rpctool_LDADD += ../lib/rpcOut/libRpcOut.la
  vmware_rpctool_LDADD += ../lib/message/libMessage.la
  vmware_rpctool_LDADD += ../lib/backdoor/libBackdoor.la
  vmware_rpctool_LDADD += ../lib/string/libString.la
+vmware_rpctool_LDADD += ../lib/misc/libMisc.la
author	Oliver Kurth <okurth@vmware.com>
	Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)
committer	Oliver Kurth <okurth@vmware.com>
	Fri, 15 Sep 2017 18:23:15 +0000 (11:23 -0700)
open-vm-tools/lib/include/codeset.h		patch \| blob \| blame \| history
open-vm-tools/lib/misc/codesetBase.c		patch \| blob \| blame \| history
open-vm-tools/rpctool/Makefile.am		patch \| blob \| blame \| history