From: Karel Zak Date: Tue, 25 Feb 2020 11:00:27 +0000 (+0100) Subject: libblkid: move UTF encoding function to lib/ X-Git-Tag: v2.36-rc1~216 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=35c6ed615f6f3ef68d26a6c4b655eeeb9d1f5d9e;p=thirdparty%2Futil-linux.git libblkid: move UTF encoding function to lib/ Let's consolidate the code, we need to use it in libfdisk too. It seems better to keep it generic and libblkid independent. This patch also removes blkid_encode_alloc(), this function is overkill. Signed-off-by: Karel Zak --- diff --git a/include/Makemodule.am b/include/Makemodule.am index c55c262c9b..3f8b6233f4 100644 --- a/include/Makemodule.am +++ b/include/Makemodule.am @@ -16,6 +16,7 @@ dist_noinst_HEADERS += \ include/crc32c.h \ include/debug.h \ include/debugobj.h \ + include/encode.h \ include/env.h \ include/exec_shell.h \ include/exitcodes.h \ diff --git a/include/encode.h b/include/encode.h new file mode 100644 index 0000000000..b259ab533c --- /dev/null +++ b/include/encode.h @@ -0,0 +1,14 @@ +#ifndef UTIL_LINUX_ENCODE_H +#define UTIL_LINUX_ENCODE_H + +extern size_t ul_encode_to_utf8(int enc, unsigned char *dest, size_t len, + const unsigned char *src, size_t count) + __attribute__((nonnull)); + +enum { + UL_ENCODE_UTF16BE = 0, + UL_ENCODE_UTF16LE, + UL_ENCODE_LATIN1 +}; + +#endif diff --git a/lib/Makemodule.am b/lib/Makemodule.am index b4d0fba2bf..16a77e3b4a 100644 --- a/lib/Makemodule.am +++ b/lib/Makemodule.am @@ -8,6 +8,7 @@ libcommon_la_SOURCES = \ lib/crc32c.c \ lib/env.c \ lib/idcache.c \ + lib/encode.c \ lib/fileutils.c \ lib/ismounted.c \ lib/color-names.c \ diff --git a/lib/encode.c b/lib/encode.c new file mode 100644 index 0000000000..bee5bd53d5 --- /dev/null +++ b/lib/encode.c @@ -0,0 +1,77 @@ +/* + * Based on code from libblkid, + * + * Copyright (C) 2008 Kay Sievers + * Copyright (C) 2009 Karel Zak + * Copyright (C) 2020 Pali Rohár + * + * This file may be redistributed under the terms of the + * GNU Lesser General Public License. + */ +#include "c.h" +#include "encode.h" + +size_t ul_encode_to_utf8(int enc, unsigned char *dest, size_t len, + const unsigned char *src, size_t count) +{ + size_t i, j; + uint32_t c; + uint16_t c2; + + for (j = i = 0; i < count; i++) { + if (enc == UL_ENCODE_UTF16LE) { + if (i+2 > count) + break; + c = (src[i+1] << 8) | src[i]; + i++; + } else if (enc == UL_ENCODE_UTF16BE) { + if (i+2 > count) + break; + c = (src[i] << 8) | src[i+1]; + i++; + } else if (enc == UL_ENCODE_LATIN1) { + c = src[i]; + } else { + return 0; + } + if ((enc == UL_ENCODE_UTF16LE || enc == UL_ENCODE_UTF16BE) && + c >= 0xD800 && c <= 0xDBFF && i+2 < count) { + if (enc == UL_ENCODE_UTF16LE) + c2 = (src[i+2] << 8) | src[i+1]; + else + c2 = (src[i+1] << 8) | src[i+2]; + if (c2 >= 0xDC00 && c2 <= 0xDFFF) { + c = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00); + i += 2; + } + } + if (c == 0) { + dest[j] = '\0'; + break; + } else if (c < 0x80) { + if (j+1 >= len) + break; + dest[j++] = (uint8_t) c; + } else if (c < 0x800) { + if (j+2 >= len) + break; + dest[j++] = (uint8_t) (0xc0 | (c >> 6)); + dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); + } else if (c < 0x10000) { + if (j+3 >= len) + break; + dest[j++] = (uint8_t) (0xe0 | (c >> 12)); + dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); + } else { + if (j+4 >= len) + break; + dest[j++] = (uint8_t) (0xf0 | (c >> 18)); + dest[j++] = (uint8_t) (0x80 | ((c >> 12) & 0x3f)); + dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); + } + } + dest[j] = '\0'; + return j; +} diff --git a/libblkid/src/blkidP.h b/libblkid/src/blkidP.h index f9bbe00840..0660c997b5 100644 --- a/libblkid/src/blkidP.h +++ b/libblkid/src/blkidP.h @@ -34,6 +34,7 @@ #include "debug.h" #include "blkid.h" #include "list.h" +#include "encode.h" /* * This describes the attributes of a specific device. @@ -543,14 +544,4 @@ extern void blkid_probe_use_wiper(blkid_probe pr, uint64_t off, uint64_t size) #define blkid_bmp_nbytes(max_items) \ (blkid_bmp_nwords(max_items) * sizeof(unsigned long)) -/* encode.c */ -extern unsigned char *blkid_encode_alloc(size_t count, size_t *reslen); -extern size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len, - const unsigned char *src, size_t count) - __attribute__((nonnull)); - -#define BLKID_ENC_UTF16BE 0 -#define BLKID_ENC_UTF16LE 1 -#define BLKID_ENC_LATIN1 2 - #endif /* _BLKID_BLKIDP_H */ diff --git a/libblkid/src/encode.c b/libblkid/src/encode.c index 36ad1c9569..9c2220428b 100644 --- a/libblkid/src/encode.c +++ b/libblkid/src/encode.c @@ -233,77 +233,6 @@ static int replace_chars(char *str, const char *white) return replaced; } -size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len, - const unsigned char *src, size_t count) -{ - size_t i, j; - uint32_t c; - uint16_t c2; - - for (j = i = 0; i < count; i++) { - if (enc == BLKID_ENC_UTF16LE) { - if (i+2 > count) - break; - c = (src[i+1] << 8) | src[i]; - i++; - } else if (enc == BLKID_ENC_UTF16BE) { - if (i+2 > count) - break; - c = (src[i] << 8) | src[i+1]; - i++; - } else if (enc == BLKID_ENC_LATIN1) { - c = src[i]; - } else { - return 0; - } - if ((enc == BLKID_ENC_UTF16LE || enc == BLKID_ENC_UTF16BE) && - c >= 0xD800 && c <= 0xDBFF && i+2 < count) { - if (enc == BLKID_ENC_UTF16LE) - c2 = (src[i+2] << 8) | src[i+1]; - else - c2 = (src[i+1] << 8) | src[i+2]; - if (c2 >= 0xDC00 && c2 <= 0xDFFF) { - c = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00); - i += 2; - } - } - if (c == 0) { - dest[j] = '\0'; - break; - } else if (c < 0x80) { - if (j+1 >= len) - break; - dest[j++] = (uint8_t) c; - } else if (c < 0x800) { - if (j+2 >= len) - break; - dest[j++] = (uint8_t) (0xc0 | (c >> 6)); - dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); - } else if (c < 0x10000) { - if (j+3 >= len) - break; - dest[j++] = (uint8_t) (0xe0 | (c >> 12)); - dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); - dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); - } else { - if (j+4 >= len) - break; - dest[j++] = (uint8_t) (0xf0 | (c >> 18)); - dest[j++] = (uint8_t) (0x80 | ((c >> 12) & 0x3f)); - dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); - dest[j++] = (uint8_t) (0x80 | (c & 0x3f)); - } - } - dest[j] = '\0'; - return j; -} - -unsigned char *blkid_encode_alloc(size_t count, size_t *reslen) -{ - *reslen = (count * 3) + 1; - return calloc(1, *reslen); -} - /** * blkid_encode_string: * @str: input string to be encoded diff --git a/libblkid/src/partitions/gpt.c b/libblkid/src/partitions/gpt.c index 52704ed63a..3ca64183c9 100644 --- a/libblkid/src/partitions/gpt.c +++ b/libblkid/src/partitions/gpt.c @@ -396,7 +396,7 @@ static int probe_gpt_pt(blkid_probe pr, blkid_partition_set_utf8name(par, (unsigned char *) e->partition_name, - sizeof(e->partition_name), BLKID_ENC_UTF16LE); + sizeof(e->partition_name), UL_ENCODE_UTF16LE); guid = e->unique_partition_guid; swap_efi_guid(&guid); diff --git a/libblkid/src/partitions/partitions.c b/libblkid/src/partitions/partitions.c index f12638e764..5bbf45cf1f 100644 --- a/libblkid/src/partitions/partitions.c +++ b/libblkid/src/partitions/partitions.c @@ -1357,7 +1357,7 @@ int blkid_partition_set_utf8name(blkid_partition par, const unsigned char *name, if (!par) return -1; - blkid_encode_to_utf8(enc, par->name, sizeof(par->name), name, len); + ul_encode_to_utf8(enc, par->name, sizeof(par->name), name, len); blkid_rtrim_whitespace(par->name); return 0; } diff --git a/libblkid/src/superblocks/exfat.c b/libblkid/src/superblocks/exfat.c index 62fb8ce0b8..ca7d6c813a 100644 --- a/libblkid/src/superblocks/exfat.c +++ b/libblkid/src/superblocks/exfat.c @@ -127,7 +127,7 @@ static int probe_exfat(blkid_probe pr, const struct blkid_idmag *mag) if (label) blkid_probe_set_utf8label(pr, label->name, min(label->length * 2, sizeof(label->name)), - BLKID_ENC_UTF16LE); + UL_ENCODE_UTF16LE); else if (errno) return -errno; diff --git a/libblkid/src/superblocks/f2fs.c b/libblkid/src/superblocks/f2fs.c index 255ef6384f..aed93e25b3 100644 --- a/libblkid/src/superblocks/f2fs.c +++ b/libblkid/src/superblocks/f2fs.c @@ -74,7 +74,7 @@ static int probe_f2fs(blkid_probe pr, const struct blkid_idmag *mag) if (*((unsigned char *) sb->volume_name)) blkid_probe_set_utf8label(pr, (unsigned char *) sb->volume_name, sizeof(sb->volume_name), - BLKID_ENC_UTF16LE); + UL_ENCODE_UTF16LE); blkid_probe_set_uuid(pr, sb->uuid); blkid_probe_sprintf_version(pr, "%u.%u", vermaj, vermin); diff --git a/libblkid/src/superblocks/hfs.c b/libblkid/src/superblocks/hfs.c index e537bbbd3c..ebf90e49e6 100644 --- a/libblkid/src/superblocks/hfs.c +++ b/libblkid/src/superblocks/hfs.c @@ -305,7 +305,7 @@ static int probe_hfsplus(blkid_probe pr, const struct blkid_idmag *mag) blkid_probe_set_utf8label(pr, key->unicode, be16_to_cpu(key->unicode_len) * 2, - BLKID_ENC_UTF16BE); + UL_ENCODE_UTF16BE); return 0; } diff --git a/libblkid/src/superblocks/iso9660.c b/libblkid/src/superblocks/iso9660.c index 8dc2e53948..1057da8614 100644 --- a/libblkid/src/superblocks/iso9660.c +++ b/libblkid/src/superblocks/iso9660.c @@ -246,7 +246,7 @@ static int probe_iso9660(blkid_probe pr, const struct blkid_idmag *mag) blkid_probe_set_utf8label(pr, iso->volume_id, sizeof(iso->volume_id), - BLKID_ENC_UTF16BE); + UL_ENCODE_UTF16BE); goto has_label; } off += ISO_SECTOR_SIZE; diff --git a/libblkid/src/superblocks/ntfs.c b/libblkid/src/superblocks/ntfs.c index 0e6f6b748b..02487e2198 100644 --- a/libblkid/src/superblocks/ntfs.c +++ b/libblkid/src/superblocks/ntfs.c @@ -201,7 +201,7 @@ static int __probe_ntfs(blkid_probe pr, const struct blkid_idmag *mag, int save_ if (attr_off + val_off + val_len <= mft_record_size) blkid_probe_set_utf8label(pr, val, val_len, - BLKID_ENC_UTF16LE); + UL_ENCODE_UTF16LE); break; } diff --git a/libblkid/src/superblocks/superblocks.c b/libblkid/src/superblocks/superblocks.c index 93d5abd4c7..baf35e51b5 100644 --- a/libblkid/src/superblocks/superblocks.c +++ b/libblkid/src/superblocks/superblocks.c @@ -625,12 +625,13 @@ int blkid_probe_set_utf8_id_label(blkid_probe pr, const char *name, if (!v) return -ENOMEM; - v->data = blkid_encode_alloc(len, &v->len); + v->len = (len * 3) + 1; + v->data = calloc(1, v->len); if (!v->data) rc = -ENOMEM; if (!rc) { - blkid_encode_to_utf8(enc, v->data, v->len, data, len); + ul_encode_to_utf8(enc, v->data, v->len, data, len); v->len = blkid_rtrim_whitespace(v->data) + 1; if (v->len > 1) v->len = blkid_ltrim_whitespace(v->data) + 1; @@ -688,11 +689,12 @@ int blkid_probe_set_utf8label(blkid_probe pr, const unsigned char *label, if (!v) return -ENOMEM; - v->data = blkid_encode_alloc(len, &v->len); + v->len = (len * 3) + 1; + v->data = calloc(1, v->len); if (!v->data) rc = -ENOMEM; if (!rc) { - blkid_encode_to_utf8(enc, v->data, v->len, label, len); + ul_encode_to_utf8(enc, v->data, v->len, label, len); v->len = blkid_rtrim_whitespace(v->data) + 1; if (v->len > 1) return 0; diff --git a/libblkid/src/superblocks/udf.c b/libblkid/src/superblocks/udf.c index c27debd29d..a8f0099642 100644 --- a/libblkid/src/superblocks/udf.c +++ b/libblkid/src/superblocks/udf.c @@ -19,7 +19,7 @@ #include "superblocks.h" -#define udf_cid_to_enc(cid) ((cid) == 8 ? BLKID_ENC_LATIN1 : (cid) == 16 ? BLKID_ENC_UTF16BE : -1) +#define udf_cid_to_enc(cid) ((cid) == 8 ? UL_ENCODE_LATIN1 : (cid) == 16 ? UL_ENCODE_UTF16BE : -1) struct dstring128 { uint8_t cid; @@ -144,7 +144,7 @@ static inline int gen_uuid_from_volset_id(unsigned char uuid[17], struct dstring if (enc == -1) return -1; - len = blkid_encode_to_utf8(enc, buf, sizeof(buf), volset_id->c, clen); + len = ul_encode_to_utf8(enc, buf, sizeof(buf), volset_id->c, clen); if (len < 8) return -1;