From: Stephan Bosch Date: Thu, 31 May 2018 22:02:16 +0000 (+0200) Subject: lib: unichar: Add uni_utf8_data_truncate(). X-Git-Tag: 2.3.9~1655 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=35c136daea614a55ac7542c27148beb15e58434d;p=thirdparty%2Fdovecot%2Fcore.git lib: unichar: Add uni_utf8_data_truncate(). Returns the size of the data when truncated to be less than or equal to a specified size, making sure UTF-8 character boundaries are respected. --- diff --git a/src/lib/unichar.c b/src/lib/unichar.c index 0b6756afd6..29489a2014 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -428,3 +428,20 @@ bool uni_utf8_data_is_valid(const unsigned char *data, size_t size) return uni_utf8_find_invalid_pos(data, size, &i) == 0; } + +size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size, + size_t max_new_size) +{ + if (max_new_size >= old_size) + return old_size; + if (max_new_size == 0) + return 0; + + if ((data[max_new_size] & 0x80) == 0) + return max_new_size; + while (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0x80) + max_new_size--; + if (max_new_size > 0 && (data[max_new_size-1] & 0xc0) == 0xc0) + max_new_size--; + return max_new_size; +} diff --git a/src/lib/unichar.h b/src/lib/unichar.h index bdeaeffa0d..08df8e56f3 100644 --- a/src/lib/unichar.h +++ b/src/lib/unichar.h @@ -114,6 +114,11 @@ bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, bool uni_utf8_str_is_valid(const char *str); /* Returns TRUE if data contains only valid UTF-8 input. */ bool uni_utf8_data_is_valid(const unsigned char *data, size_t size); +/* Returns the size of the data when truncated to be less than or equal to + max_new_size, making sure UTF-8 character boundaries are respected. This only + looks at the last character at the new boundary. */ +size_t uni_utf8_data_truncate(const unsigned char *data, size_t old_size, + size_t max_new_size); /* surrogate handling */ static inline unichar_t uni_join_surrogate(unichar_t high, unichar_t low)