]> git.ipfire.org Git - thirdparty/ccache.git/commitdiff
refactor: Simplify UTF-16LE to UTF-8 conversion in util::read_file
authorJoel Rosdahl <joel@rosdahl.net>
Wed, 3 Sep 2025 17:58:00 +0000 (19:58 +0200)
committerJoel Rosdahl <joel@rosdahl.net>
Sun, 7 Sep 2025 18:21:49 +0000 (20:21 +0200)
src/ccache/util/file.cpp
unittest/test_util_file.cpp

index 6605e96abb8ef4ffd687eb1043e2f929ccd98c24..61950b5ba122e5ea98385d466db0f345f1b69d24 100644 (file)
@@ -382,37 +382,36 @@ read_file(const fs::path& path, size_t size_hint)
   if constexpr (std::is_same_v<T, std::string>) {
     // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM.
     if (has_utf16_le_bom(result)) {
-      result.erase(0, 2); // Remove BOM.
-      if (result.empty()) {
-        return result;
-      }
-
-      std::wstring result_as_u16(
-        reinterpret_cast<const wchar_t*>(result.data()), result.size() / 2);
-      const int size = WideCharToMultiByte(CP_UTF8,
-                                           WC_ERR_INVALID_CHARS,
-                                           result_as_u16.c_str(),
-                                           int(result_as_u16.size()),
-                                           nullptr,
-                                           0,
-                                           nullptr,
-                                           nullptr);
-      if (size <= 0) {
+      DEBUG_ASSERT(result.size() >= 2);
+      const wchar_t* utf16 =
+        reinterpret_cast<const wchar_t*>(result.data() + 2);
+      const int utf16_size = static_cast<int>((result.size() - 2) / 2);
+      const int utf8_size = WideCharToMultiByte(CP_UTF8,
+                                                WC_ERR_INVALID_CHARS,
+                                                utf16,
+                                                utf16_size,
+                                                nullptr,
+                                                0,
+                                                nullptr,
+                                                nullptr);
+      if (utf8_size <= 0) {
         return tl::unexpected(
           FMT("Failed to convert {} from UTF-16LE to UTF-8: {}",
               path,
               util::win32_error_message(GetLastError())));
       }
 
-      result = std::string(size, '\0');
+      std::string utf8(utf8_size, '\0');
       WideCharToMultiByte(CP_UTF8,
                           0,
-                          result_as_u16.c_str(),
-                          int(result_as_u16.size()),
-                          &result.at(0),
-                          size,
+                          utf16,
+                          utf16_size,
+                          utf8.data(),
+                          utf8_size,
                           nullptr,
                           nullptr);
+
+      result.swap(utf8);
     }
   }
 #endif
index 9e8366d9bb0c0e18d53849741b61d99e8ed3118e..3cf84e964490c5a539f0ee333b481f8aedd37fbb 100644 (file)
@@ -181,6 +181,8 @@ TEST_CASE("util::read_file<std::string> with UTF-16 little endian encoding")
   CHECK(util::write_file("test", data));
   read_data = util::read_file<std::string>("test");
   REQUIRE(!read_data);
+  REQUIRE(util::starts_with(read_data.error(),
+                            "Failed to convert test from UTF-16LE to UTF-8:"));
 }
 #endif