From a0044466cc46baf25a316ea63781c60aa52b58ca Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 19 Aug 2010 18:06:22 +0100 Subject: [PATCH] UTF-8 string validity was still checked incorrectly. --- src/lib/unichar.c | 5 ++--- src/lib/unichar.h | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/unichar.c b/src/lib/unichar.c index 9d5d406f6a..961b2fe96a 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -316,7 +316,7 @@ int uni_utf8_to_decomposed_titlecase(const void *_input, size_t max_len, static inline unsigned int is_valid_utf8_seq(const unsigned char *input, unsigned int size) { - size_t i, len; + unsigned int i, len; len = uni_utf8_char_bytes(input[0]); if (unlikely(len > size || len == 1)) @@ -325,8 +325,7 @@ is_valid_utf8_seq(const unsigned char *input, unsigned int size) /* the rest of the chars should be in 0x80..0xbf range. anything else is start of a sequence or invalid */ for (i = 1; i < len; i++) { - if (unlikely(uni_utf8_char_bytes(input[i]) != len-i || - input[i] < 0x80 || input[i] >= 0xbf)) + if (unlikely(input[i] < 0x80 || input[i] > 0xbf)) return 0; } return len; diff --git a/src/lib/unichar.h b/src/lib/unichar.h index 5dfaf8e70f..3132bd1bec 100644 --- a/src/lib/unichar.h +++ b/src/lib/unichar.h @@ -43,8 +43,9 @@ int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r); /* Returns UTF-8 string length with maximum input size. */ unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE; -/* Returns the number of bytes belonging to this partial UTF-8 character. - Invalid input is returned with length 1. */ +/* Returns the number of bytes belonging to this UTF-8 character. The given + parameter is the first byte of the UTF-8 sequence. Invalid input is + returned with length 1. */ static inline unsigned int ATTR_CONST uni_utf8_char_bytes(char chr) { -- 2.47.3