From: Lennart Poettering Date: Wed, 5 Feb 2025 09:44:19 +0000 (+0100) Subject: utf8: add helper that determines length in bytes of last UTF-8 character in string X-Git-Tag: v258-rc1~1322^2~7 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=104a6b8c390730f66f5acbcaefbc27898bfef9fe;p=thirdparty%2Fsystemd.git utf8: add helper that determines length in bytes of last UTF-8 character in string --- diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 2a9da598812..7b47df6f4f3 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -609,3 +609,26 @@ size_t utf8_console_width(const char *str) { return n; } + +size_t utf8_last_length(const char *s, size_t n) { + int r; + + if (n == SIZE_MAX) + n = strlen(s); + + /* Determines length in bytes of last UTF-8 codepoint in string. If the string is empty, returns + * zero. Treats invalid UTF-8 codepoints as 1 sized ones. */ + + for (size_t last = 0;;) { + if (n == 0) + return last; + + r = utf8_encoded_valid_unichar(s, n); + if (r <= 0) + r = 1; /* treat invalid UTF-8 as byte-wide */ + + s += r; + n -= r; + last = r; + } +} diff --git a/src/basic/utf8.h b/src/basic/utf8.h index 221bc46a2df..f6158b2ef10 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -62,3 +62,5 @@ static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t t size_t utf8_n_codepoints(const char *str); int utf8_char_console_width(const char *str); size_t utf8_console_width(const char *str); + +size_t utf8_last_length(const char *s, size_t n); diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index d60cf00bf32..18974e76642 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -227,6 +227,18 @@ TEST(utf8_to_utf16) { } } +TEST(utf8_last_length) { + ASSERT_EQ(utf8_last_length("", 0), 0U); + ASSERT_EQ(utf8_last_length("", SIZE_MAX), 0U); + ASSERT_EQ(utf8_last_length("a", 1), 1U); + ASSERT_EQ(utf8_last_length("a", SIZE_MAX), 1U); + ASSERT_EQ(utf8_last_length("ä", SIZE_MAX), strlen("ä")); + ASSERT_EQ(utf8_last_length("👊", SIZE_MAX), strlen("👊")); + ASSERT_EQ(utf8_last_length("koffa", SIZE_MAX), 1U); + ASSERT_EQ(utf8_last_length("koffä", SIZE_MAX), strlen("ä")); + ASSERT_EQ(utf8_last_length("koff👊", SIZE_MAX), strlen("👊")); +} + static int intro(void) { log_show_color(true); return EXIT_SUCCESS;