static inline unsigned int
is_valid_utf8_seq(const unsigned char *input, unsigned int size)
{
- size_t i, len;
+ unsigned int i, len;
len = uni_utf8_char_bytes(input[0]);
if (unlikely(len > size || len == 1))
/* the rest of the chars should be in 0x80..0xbf range.
anything else is start of a sequence or invalid */
for (i = 1; i < len; i++) {
- if (unlikely(uni_utf8_char_bytes(input[i]) != len-i ||
- input[i] < 0x80 || input[i] >= 0xbf))
+ if (unlikely(input[i] < 0x80 || input[i] > 0xbf))
return 0;
}
return len;
/* Returns UTF-8 string length with maximum input size. */
unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE;
-/* Returns the number of bytes belonging to this partial UTF-8 character.
- Invalid input is returned with length 1. */
+/* Returns the number of bytes belonging to this UTF-8 character. The given
+ parameter is the first byte of the UTF-8 sequence. Invalid input is
+ returned with length 1. */
static inline unsigned int ATTR_CONST
uni_utf8_char_bytes(char chr)
{