{
std::string argtext;
try {
- argtext = Util::read_file(filename);
+ argtext = Util::read_text_file(filename);
} catch (core::Error&) {
return nullopt;
}
#include <algorithm>
#include <climits>
+#include <codecvt>
#include <fstream>
+#include <locale>
#ifndef HAVE_DIRENT_H
# include <filesystem>
return result;
}
+bool
+has_utf16_le_bom(string_view text)
+{
+ return text.size() > 1
+ && ((static_cast<uint8_t>(text[0]) == 0xff
+ && static_cast<uint8_t>(text[1]) == 0xfe));
+}
+
} // namespace
namespace Util {
return result;
}
+std::string
+read_text_file(const std::string& path, size_t size_hint)
+{
+ std::string result = read_file(path, size_hint);
+ // Convert to UTF-8 if the contents start with UTF-16 little-endian BOM
+ if (has_utf16_le_bom(result)) {
+ result.erase(0, 2); // Remove BOM
+ std::u16string result_as_u16((result.size() / 2) + 1, '\0');
+ result_as_u16 = reinterpret_cast<const char16_t*>(result.c_str());
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
+ result = converter.to_bytes(result_as_u16);
+ }
+ return result;
+}
+
#ifndef _WIN32
std::string
read_link(const std::string& path)
// without the path.
std::string read_file(const std::string& path, size_t size_hint = 0);
+// Return contents of a text file as UTF-8 encoded string.
+//
+// Throws `core::Error` on error. The description contains the error message
+// without the path.
+std::string read_text_file(const std::string& path, size_t size_hint = 0);
+
#ifndef _WIN32
// Like readlink(2) but returns the string (or the empty string on failure).
std::string read_link(const std::string& path);
CHECK(Util::read_file("copy") == data);
}
+TEST_CASE("Util::read_text_file with UTF-16 little endian encoding")
+{
+ TestContext test_context;
+
+ std::string data;
+ data.push_back(static_cast<unsigned char>(0xff));
+ data.push_back(static_cast<unsigned char>(0xfe));
+ data.push_back('a');
+ data.push_back('\0');
+ data.push_back('b');
+ data.push_back('\0');
+ data.push_back('c');
+ data.push_back('\0');
+
+ Util::write_file("test", data);
+ CHECK(Util::read_text_file("test") == "abc");
+}
+
TEST_CASE("Util::remove_extension")
{
CHECK(Util::remove_extension("") == "");