]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
utf8: add helper that determines length in bytes of last UTF-8 character in string
authorLennart Poettering <lennart@poettering.net>
Wed, 5 Feb 2025 09:44:19 +0000 (10:44 +0100)
committerLennart Poettering <lennart@poettering.net>
Mon, 17 Feb 2025 14:05:18 +0000 (15:05 +0100)
src/basic/utf8.c
src/basic/utf8.h
src/test/test-utf8.c

index 2a9da5988128fdf579e91d5d9cbbb1a672dd303e..7b47df6f4f3e35ca00cac3ff4f275bcea9a674b6 100644 (file)
@@ -609,3 +609,26 @@ size_t utf8_console_width(const char *str) {
 
         return n;
 }
+
+size_t utf8_last_length(const char *s, size_t n) {
+        int r;
+
+        if (n == SIZE_MAX)
+                n = strlen(s);
+
+        /* Determines length in bytes of last UTF-8 codepoint in string. If the string is empty, returns
+         * zero. Treats invalid UTF-8 codepoints as 1 sized ones. */
+
+        for (size_t last = 0;;) {
+                if (n == 0)
+                        return last;
+
+                r = utf8_encoded_valid_unichar(s, n);
+                if (r <= 0)
+                        r = 1; /* treat invalid UTF-8 as byte-wide */
+
+                s += r;
+                n -= r;
+                last = r;
+        }
+}
index 221bc46a2df018467d90a838d0f8311239f4acbc..f6158b2ef101b40931d4583ec213fee8f92a174b 100644 (file)
@@ -62,3 +62,5 @@ static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t t
 size_t utf8_n_codepoints(const char *str);
 int utf8_char_console_width(const char *str);
 size_t utf8_console_width(const char *str);
+
+size_t utf8_last_length(const char *s, size_t n);
index d60cf00bf325c7c8f444ab6e346ce8d7a6a08165..18974e76642b14357196024ab2d4b57c6c7b37a1 100644 (file)
@@ -227,6 +227,18 @@ TEST(utf8_to_utf16) {
         }
 }
 
+TEST(utf8_last_length) {
+        ASSERT_EQ(utf8_last_length("", 0), 0U);
+        ASSERT_EQ(utf8_last_length("", SIZE_MAX), 0U);
+        ASSERT_EQ(utf8_last_length("a", 1), 1U);
+        ASSERT_EQ(utf8_last_length("a", SIZE_MAX), 1U);
+        ASSERT_EQ(utf8_last_length("ä", SIZE_MAX), strlen("ä"));
+        ASSERT_EQ(utf8_last_length("👊", SIZE_MAX), strlen("👊"));
+        ASSERT_EQ(utf8_last_length("koffa", SIZE_MAX), 1U);
+        ASSERT_EQ(utf8_last_length("koffä", SIZE_MAX), strlen("ä"));
+        ASSERT_EQ(utf8_last_length("koff👊", SIZE_MAX), strlen("👊"));
+}
+
 static int intro(void) {
         log_show_color(true);
         return EXIT_SUCCESS;