From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 14 Aug 2023 11:15:52 +0000 (+0200)
Subject: utf8: automatically determine length of string if SIZE_MAX is specified
X-Git-Tag: v255-rc1~655
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ba091282e5ed2f77cbebc3328faa1da362fdab4d;p=thirdparty%2Fsystemd.git

utf8: automatically determine length of string if SIZE_MAX is specified

Let's make utf8_to_utf16() and utf16_to_utf8() a bit nicer to use by
adding shortcuts for common cases.

This is particularly relevant for utf16_to_utf8() since the
multiplication with 2 is easy to forget.
---

diff --git a/src/basic/efivars.c b/src/basic/efivars.c
index 07a2a54f3bc..cb239bdf18c 100644
--- a/src/basic/efivars.c
+++ b/src/basic/efivars.c
@@ -265,7 +265,7 @@ finish:
 int efi_set_variable_string(const char *variable, const char *value) {
         _cleanup_free_ char16_t *u16 = NULL;
 
-        u16 = utf8_to_utf16(value, strlen(value));
+        u16 = utf8_to_utf16(value, SIZE_MAX);
         if (!u16)
                 return -ENOMEM;
 
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
index 9d9e76904ee..36e1e0f1558 100644
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -389,11 +389,23 @@ char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
         const uint8_t *f;
         char *r, *t;
 
+        if (length == 0)
+                return new0(char, 1);
+
         assert(s);
 
+        if (length == SIZE_MAX) {
+                length = char16_strlen(s);
+
+                if (length > SIZE_MAX/2)
+                        return NULL; /* overflow */
+
+                length *= 2;
+        }
+
         /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
          * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
-        if (length * 2 < length)
+        if (length > (SIZE_MAX - 1) / 2)
                 return NULL; /* overflow */
 
         r = new(char, length * 2 + 1);
@@ -463,8 +475,17 @@ char16_t *utf8_to_utf16(const char *s, size_t length) {
         char16_t *n, *p;
         int r;
 
+        if (length == 0)
+                return new0(char16_t, 1);
+
         assert(s);
 
+        if (length == SIZE_MAX)
+                length = strlen(s);
+
+        if (length > SIZE_MAX - 1)
+                return NULL; /* overflow */
+
         n = new(char16_t, length + 1);
         if (!n)
                 return NULL;
diff --git a/src/boot/bootctl-set-efivar.c b/src/boot/bootctl-set-efivar.c
index cbf92caaafd..9feb0e3d2ea 100644
--- a/src/boot/bootctl-set-efivar.c
+++ b/src/boot/bootctl-set-efivar.c
@@ -34,7 +34,7 @@ static int parse_timeout(const char *arg1, char16_t **ret_timeout, size_t *ret_t
 
         xsprintf(utf8, USEC_FMT, MIN(timeout / USEC_PER_SEC, UINT32_MAX));
 
-        encoded = utf8_to_utf16(utf8, strlen(utf8));
+        encoded = utf8_to_utf16(utf8, SIZE_MAX);
         if (!encoded)
                 return log_oom();
 
@@ -69,7 +69,7 @@ static int parse_loader_entry_target_arg(const char *arg1, char16_t **ret_target
         } else if (arg1[0] == '@' && !streq(arg1, "@saved"))
                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unsupported special entry identifier: %s", arg1);
         else {
-                encoded = utf8_to_utf16(arg1, strlen(arg1));
+                encoded = utf8_to_utf16(arg1, SIZE_MAX);
                 if (!encoded)
                         return log_oom();
 
diff --git a/src/boot/efi/test-bcd.c b/src/boot/efi/test-bcd.c
index 0ee29477040..3f93ca05c58 100644
--- a/src/boot/efi/test-bcd.c
+++ b/src/boot/efi/test-bcd.c
@@ -152,7 +152,7 @@ TEST(argv_bcds) {
 
                 char16_t *title = get_bcd_title(bcd, len);
                 if (title) {
-                        _cleanup_free_ char *title_utf8 = utf16_to_utf8(title, char16_strlen(title) * 2);
+                        _cleanup_free_ char *title_utf8 = utf16_to_utf8(title, SIZE_MAX);
                         log_info("%s: \"%s\"", saved_argv[i], title_utf8);
                 } else
                         log_info("%s: Bad BCD", saved_argv[i]);
diff --git a/src/shared/gpt.c b/src/shared/gpt.c
index dd96261888c..34180a32c5b 100644
--- a/src/shared/gpt.c
+++ b/src/shared/gpt.c
@@ -273,7 +273,7 @@ Architecture gpt_partition_type_uuid_to_arch(sd_id128_t id) {
 int gpt_partition_label_valid(const char *s) {
         _cleanup_free_ char16_t *recoded = NULL;
 
-        recoded = utf8_to_utf16(s, strlen(s));
+        recoded = utf8_to_utf16(s, SIZE_MAX);
         if (!recoded)
                 return -ENOMEM;
 
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index 1b7da9d511e..a0d7dc14ef1 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -183,7 +183,7 @@ TEST(utf16_to_utf8) {
         assert_se(b);
 
         free(a);
-        a = utf16_to_utf8(b, char16_strlen(b) * 2);
+        a = utf16_to_utf8(b, SIZE_MAX);
         assert_se(a);
         assert_se(strlen(a) == sizeof(utf8));
         assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
@@ -218,10 +218,10 @@ TEST(utf8_to_utf16) {
                 _cleanup_free_ char16_t *a = NULL;
                 _cleanup_free_ char *b = NULL;
 
-                a = utf8_to_utf16(p, strlen(p));
+                a = utf8_to_utf16(p, SIZE_MAX);
                 assert_se(a);
 
-                b = utf16_to_utf8(a, char16_strlen(a) * 2);
+                b = utf16_to_utf8(a, SIZE_MAX);
                 assert_se(b);
                 assert_se(streq(p, b));
         }