From: vvainola Date: Sat, 19 Feb 2022 15:22:16 +0000 (+0200) Subject: fix: Support UTF-16LE .rsp files (#1005) X-Git-Tag: v4.6~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1ee99b7bbdae21b5aef9b28e21cdf9b14b59c5ce;p=thirdparty%2Fccache.git fix: Support UTF-16LE .rsp files (#1005) --- diff --git a/src/Args.cpp b/src/Args.cpp index 3731e26b6..572911ece 100644 --- a/src/Args.cpp +++ b/src/Args.cpp @@ -54,7 +54,7 @@ Args::from_atfile(const std::string& filename, bool ignore_backslash) { std::string argtext; try { - argtext = Util::read_file(filename); + argtext = Util::read_text_file(filename); } catch (core::Error&) { return nullopt; } diff --git a/src/Util.cpp b/src/Util.cpp index f03ba6fbe..d9c5fdbf2 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -48,7 +48,9 @@ extern "C" { #include #include +#include #include +#include #ifndef HAVE_DIRENT_H # include @@ -207,6 +209,14 @@ rewrite_stderr_to_absolute_paths(string_view text) return result; } +bool +has_utf16_le_bom(string_view text) +{ + return text.size() > 1 + && ((static_cast(text[0]) == 0xff + && static_cast(text[1]) == 0xfe)); +} + } // namespace namespace Util { @@ -1078,6 +1088,21 @@ read_file(const std::string& path, size_t size_hint) return result; } +std::string +read_text_file(const std::string& path, size_t size_hint) +{ + std::string result = read_file(path, size_hint); + // Convert to UTF-8 if the contents start with UTF-16 little-endian BOM + if (has_utf16_le_bom(result)) { + result.erase(0, 2); // Remove BOM + std::u16string result_as_u16((result.size() / 2) + 1, '\0'); + result_as_u16 = reinterpret_cast(result.c_str()); + std::wstring_convert, char16_t> converter; + result = converter.to_bytes(result_as_u16); + } + return result; +} + #ifndef _WIN32 std::string read_link(const std::string& path) diff --git a/src/Util.hpp b/src/Util.hpp index 3647f3f30..de191e7cb 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -293,6 +293,12 @@ bool read_fd(int fd, DataReceiver data_receiver); // without the path. std::string read_file(const std::string& path, size_t size_hint = 0); +// Return contents of a text file as UTF-8 encoded string. +// +// Throws `core::Error` on error. The description contains the error message +// without the path. +std::string read_text_file(const std::string& path, size_t size_hint = 0); + #ifndef _WIN32 // Like readlink(2) but returns the string (or the empty string on failure). std::string read_link(const std::string& path); diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index f2398c4fb..3d59b71fe 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -667,6 +667,24 @@ TEST_CASE("Util::{read,write,copy}_file with binary files") CHECK(Util::read_file("copy") == data); } +TEST_CASE("Util::read_text_file with UTF-16 little endian encoding") +{ + TestContext test_context; + + std::string data; + data.push_back(static_cast(0xff)); + data.push_back(static_cast(0xfe)); + data.push_back('a'); + data.push_back('\0'); + data.push_back('b'); + data.push_back('\0'); + data.push_back('c'); + data.push_back('\0'); + + Util::write_file("test", data); + CHECK(Util::read_text_file("test") == "abc"); +} + TEST_CASE("Util::remove_extension") { CHECK(Util::remove_extension("") == "");