]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
fs/nls: Fix utf16 to utf8 conversion
authorArmin Wolf <W_Armin@gmx.de>
Tue, 11 Nov 2025 13:11:22 +0000 (14:11 +0100)
committerIlpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tue, 18 Nov 2025 09:18:06 +0000 (11:18 +0200)
Currently the function responsible for converting between utf16 and
utf8 strings will ignore any characters that cannot be converted. This
however also includes multi-byte characters that do not fit into the
provided string buffer.

This can cause problems if such a multi-byte character is followed by
a single-byte character. In such a case the multi-byte character might
be ignored when the provided string buffer is too small, but the
single-byte character might fit and is thus still copied into the
resulting string.

Fix this by stop filling the provided string buffer once a character
does not fit. In order to be able to do this extend utf32_to_utf8()
to return useful errno codes instead of -1.

Fixes: 74675a58507e ("NLS: update handling of Unicode")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251111131125.3379-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
fs/nls/nls_base.c

index 18d597e49a194e45d93db76f0af598df99a237b7..d434c4463a8f7255326ce2ece3632029c1a73be6 100644 (file)
@@ -94,7 +94,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
 
        l = u;
        if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
-               return -1;
+               return -EILSEQ;
 
        nc = 0;
        for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
@@ -110,7 +110,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
                        return nc;
                }
        }
-       return -1;
+       return -EOVERFLOW;
 }
 EXPORT_SYMBOL(utf32_to_utf8);
 
@@ -217,8 +217,16 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
                                inlen--;
                        }
                        size = utf32_to_utf8(u, op, maxout);
-                       if (size == -1) {
-                               /* Ignore character and move on */
+                       if (size < 0) {
+                               if (size == -EILSEQ) {
+                                       /* Ignore character and move on */
+                                       continue;
+                               }
+                               /*
+                                * Stop filling the buffer with data once a character
+                                * does not fit anymore.
+                                */
+                               break;
                        } else {
                                op += size;
                                maxout -= size;