Bool CodeSet_IsValidUTF8String(const char *bufIn, // IN:
size_t sizeIn); // IN:
+typedef struct {
+ char c;
+ char *escape;
+} CodeSetEscapeEntry;
+
+char *CodeSet_Utf8Escape(const char *utf8, // IN:
+ const CodeSetEscapeEntry *entries); // IN:
+
+char *CodeSet_JsonEscape(const char *utf8); // IN:
+
/*
*-----------------------------------------------------------------------------
*
/*********************************************************
- * Copyright (C) 2010-2017 VMware, Inc. All rights reserved.
+ * Copyright (C) 2010-2020 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
uint8 *e;
uint32 c;
int len;
+
ASSERT(string < end);
c = *p;
goto out;
}
- if ((c < 0xc2) || (c > 0xf4)) {
- // 0x81 to 0xbf are not valid first bytes
- // 0xc0 and 0xc1 cannot appear in UTF-8, see below
- // leading char can not be > 0xf4, illegal as well
+ if ((c < 0xC2) || (c > 0xF4)) {
+ // 0x81 to 0xBF are not valid first bytes
+ // 0xC0 and 0xC1 cannot appear in UTF-8, see below
+ // leading char cannot be > 0xF4, illegal as well
return 0;
}
- if (c < 0xe0) {
+ if (c < 0xE0) {
// U+0080 - U+07FF: 2 bytes of UTF-8.
- c -= 0xc0;
+ c -= 0xC0;
len = 2;
- } else if (c < 0xf0) {
+ } else if (c < 0xF0) {
// U+0800 - U+FFFF: 3 bytes of UTF-8.
- c -= 0xe0;
+ c -= 0xE0;
len = 3;
} else {
// U+10000 - U+10FFFF: 4 bytes of UTF-8.
- c -= 0xf0;
+ c -= 0xF0;
len = 4;
}
}
while (++p < e) {
- if ((*p & 0xc0) != 0x80) {
+ if ((*p & 0xC0) != 0x80) {
// bad trailing byte
return 0;
}
* termination.
*
* This test does not work for len == 2, but that case is handled
- * by requiring the first byte to be 0xc2 or greater (see above).
+ * by requiring the first byte to be 0xC2 or greater (see above).
*/
if (c < 1U << (len * 5 - 4)) {
char *end;
uint32 codePoints = 0;
- ASSERT(utf8);
+ ASSERT(utf8 != NULL);
p = (char *) utf8;
end = p + strlen(utf8);
*/
int
-CodeSet_CodePointOffsetToByteOffset(const char *utf8, // IN
- int codePointOffset) // IN
+CodeSet_CodePointOffsetToByteOffset(const char *utf8, // IN:
+ int codePointOffset) // IN:
{
const char *p;
const char *end;
- ASSERT(utf8);
+ ASSERT(utf8 != NULL);
p = utf8;
end = p + strlen(utf8);
uint32 *ptr;
int codePoints;
- ASSERT(utf32);
+ ASSERT(utf32 != NULL);
if (utf8 == NULL) { // NULL is not an error
*utf32 = NULL;
uint8 bytes[4];
} value;
- ASSERT(utf8);
+ ASSERT(utf8 != NULL);
if (utf32 == NULL) { // NULL is not an error
*utf8 = NULL;
return TRUE;
}
-