From: Joel Rosdahl Date: Wed, 3 Sep 2025 17:58:00 +0000 (+0200) Subject: refactor: Simplify UTF-16LE to UTF-8 conversion in util::read_file X-Git-Tag: v4.12~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f0b559509cac63e1ffd3e75c13fd55b21421fa24;p=thirdparty%2Fccache.git refactor: Simplify UTF-16LE to UTF-8 conversion in util::read_file --- diff --git a/src/ccache/util/file.cpp b/src/ccache/util/file.cpp index 6605e96a..61950b5b 100644 --- a/src/ccache/util/file.cpp +++ b/src/ccache/util/file.cpp @@ -382,37 +382,36 @@ read_file(const fs::path& path, size_t size_hint) if constexpr (std::is_same_v) { // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM. if (has_utf16_le_bom(result)) { - result.erase(0, 2); // Remove BOM. - if (result.empty()) { - return result; - } - - std::wstring result_as_u16( - reinterpret_cast(result.data()), result.size() / 2); - const int size = WideCharToMultiByte(CP_UTF8, - WC_ERR_INVALID_CHARS, - result_as_u16.c_str(), - int(result_as_u16.size()), - nullptr, - 0, - nullptr, - nullptr); - if (size <= 0) { + DEBUG_ASSERT(result.size() >= 2); + const wchar_t* utf16 = + reinterpret_cast(result.data() + 2); + const int utf16_size = static_cast((result.size() - 2) / 2); + const int utf8_size = WideCharToMultiByte(CP_UTF8, + WC_ERR_INVALID_CHARS, + utf16, + utf16_size, + nullptr, + 0, + nullptr, + nullptr); + if (utf8_size <= 0) { return tl::unexpected( FMT("Failed to convert {} from UTF-16LE to UTF-8: {}", path, util::win32_error_message(GetLastError()))); } - result = std::string(size, '\0'); + std::string utf8(utf8_size, '\0'); WideCharToMultiByte(CP_UTF8, 0, - result_as_u16.c_str(), - int(result_as_u16.size()), - &result.at(0), - size, + utf16, + utf16_size, + utf8.data(), + utf8_size, nullptr, nullptr); + + result.swap(utf8); } } #endif diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp index 9e8366d9..3cf84e96 100644 --- a/unittest/test_util_file.cpp +++ b/unittest/test_util_file.cpp @@ -181,6 +181,8 @@ TEST_CASE("util::read_file with UTF-16 little endian encoding") CHECK(util::write_file("test", data)); read_data = util::read_file("test"); REQUIRE(!read_data); + REQUIRE(util::starts_with(read_data.error(), + "Failed to convert test from UTF-16LE to UTF-8:")); } #endif