From f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Sat, 10 Jan 2015 04:30:40 +0200 Subject: [PATCH] lib: Added uni_utf8_partial_strlen_n() --- src/lib/test-unichar.c | 16 ++++++++++++++++ src/lib/unichar.c | 18 ++++++++++++++---- src/lib/unichar.h | 6 ++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/lib/test-unichar.c b/src/lib/test-unichar.c index 3221e19a09..6607809616 100644 --- a/src/lib/test-unichar.c +++ b/src/lib/test-unichar.c @@ -5,6 +5,20 @@ #include "buffer.h" #include "unichar.h" +static void test_unichar_uni_utf8_partial_strlen_n(void) +{ + static const char input[] = "\xC3\xA4\xC3\xA4"; + size_t pos; + + test_begin("uni_utf8_partial_strlen_n()"); + test_assert(uni_utf8_partial_strlen_n(input, 1, &pos) == 0 && pos == 0); + test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2); + test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2); + test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4); + test_assert(uni_utf8_partial_strlen_n(input, (size_t)-1, &pos) == 2 && pos == 4); + test_end(); +} + void test_unichar(void) { static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1"; @@ -32,4 +46,6 @@ void test_unichar(void) test_assert(!uni_utf8_str_is_valid(overlong_utf8)); test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0); test_end(); + + test_unichar_uni_utf8_partial_strlen_n(); } diff --git a/src/lib/unichar.c b/src/lib/unichar.c index ed1c28c9e2..e0f5f25a9f 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -192,18 +192,28 @@ unsigned int uni_utf8_strlen(const char *input) return uni_utf8_strlen_n(input, (size_t)-1); } -unsigned int uni_utf8_strlen_n(const void *_input, size_t size) +unsigned int uni_utf8_strlen_n(const void *input, size_t size) +{ + size_t partial_pos; + + return uni_utf8_partial_strlen_n(input, size, &partial_pos); +} + +unsigned int uni_utf8_partial_strlen_n(const void *_input, size_t size, + size_t *partial_pos_r) { const unsigned char *input = _input; - unsigned int len = 0; + unsigned int count, len = 0; size_t i; for (i = 0; i < size && input[i] != '\0'; ) { - i += uni_utf8_char_bytes(input[i]); - if (i > size) + count = uni_utf8_char_bytes(input[i]); + if (i + count > size) break; + i += count; len++; } + *partial_pos_r = i; return len; } diff --git a/src/lib/unichar.h b/src/lib/unichar.h index a5466b2dcb..dbc3bd8d45 100644 --- a/src/lib/unichar.h +++ b/src/lib/unichar.h @@ -55,6 +55,12 @@ int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r); unsigned int uni_utf8_strlen(const char *input) ATTR_PURE; /* Returns UTF-8 string length with maximum input size. */ unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE; +/* Same as uni_utf8_strlen_n(), but if input ends with a partial UTF-8 + character, don't include it in the return value and set partial_pos_r to + where the character begins. Otherwise partial_pos_r is set to the end + of the input. */ +unsigned int uni_utf8_partial_strlen_n(const void *input, size_t size, + size_t *partial_pos_r); /* Returns the number of bytes belonging to this UTF-8 character. The given parameter is the first byte of the UTF-8 sequence. Invalid input is -- 2.47.3