]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
libblkid: Fix UTF-16 support in function blkid_encode_to_utf8()
authorPali Rohár <pali.rohar@gmail.com>
Sun, 9 Feb 2020 12:04:34 +0000 (13:04 +0100)
committerPali Rohár <pali.rohar@gmail.com>
Sun, 9 Feb 2020 12:04:34 +0000 (13:04 +0100)
Function blkid_encode_to_utf8() says that is supports BLKID_ENC_UTF16LE and
BLKID_ENC_UTF16BE encodings, but it is not truth and supports only UCS-2
(and not full UTF-16).

As all places where BLKID_ENC_UTF16LE and BLKID_ENC_UTF16BE is used expects
UTF-16 and not UCS-2, this patch changes implementation of encodings
BLKID_ENC_UTF16LE and BLKID_ENC_UTF16BE to supports full UTF-16, including
surrogate pairs and not only UCS-2.

libblkid/src/encode.c

index 33d349127e65fc582a39d007729acde08f718a93..36ad1c9569f4cebda516535e3c023e8b797bd61e 100644 (file)
@@ -237,7 +237,8 @@ size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len,
                        const unsigned char *src, size_t count)
 {
        size_t i, j;
-       uint16_t c;
+       uint32_t c;
+       uint16_t c2;
 
        for (j = i = 0; i < count; i++) {
                if (enc == BLKID_ENC_UTF16LE) {
@@ -255,6 +256,17 @@ size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len,
                } else {
                        return 0;
                }
+               if ((enc == BLKID_ENC_UTF16LE || enc == BLKID_ENC_UTF16BE) &&
+                   c >= 0xD800 && c <= 0xDBFF && i+2 < count) {
+                       if (enc == BLKID_ENC_UTF16LE)
+                               c2 = (src[i+2] << 8) | src[i+1];
+                       else
+                               c2 = (src[i+1] << 8) | src[i+2];
+                       if (c2 >= 0xDC00 && c2 <= 0xDFFF) {
+                               c = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00);
+                               i += 2;
+                       }
+               }
                if (c == 0) {
                        dest[j] = '\0';
                        break;
@@ -267,12 +279,19 @@ size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len,
                                break;
                        dest[j++] = (uint8_t) (0xc0 | (c >> 6));
                        dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
-               } else {
+               } else if (c < 0x10000) {
                        if (j+3 >= len)
                                break;
                        dest[j++] = (uint8_t) (0xe0 | (c >> 12));
                        dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
                        dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
+               } else {
+                       if (j+4 >= len)
+                               break;
+                       dest[j++] = (uint8_t) (0xf0 | (c >> 18));
+                       dest[j++] = (uint8_t) (0x80 | ((c >> 12) & 0x3f));
+                       dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+                       dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
                }
        }
        dest[j] = '\0';