From: Joel Rosdahl Date: Mon, 15 Aug 2022 05:39:24 +0000 (+0200) Subject: enhance: Add util::{read_fd,read_file,write_file} functions X-Git-Tag: v4.7~114 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6f227afd935aee61e6ad32c85b570a16c3709957;p=thirdparty%2Fccache.git enhance: Add util::{read_fd,read_file,write_file} functions --- diff --git a/cmake/config.h.in b/cmake/config.h.in index 83a3cf52f..9210147ed 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -210,6 +210,9 @@ typedef int pid_t; #if !defined(_WIN32) && !defined(O_BINARY) # define O_BINARY 0 #endif +#if !defined(_WIN32) && !defined(O_TEXT) +# define O_TEXT 0 +#endif #ifndef ESTALE # define ESTALE -1 diff --git a/src/util/file.cpp b/src/util/file.cpp index 44ca5cbda..99f7998a0 100644 --- a/src/util/file.cpp +++ b/src/util/file.cpp @@ -18,11 +18,15 @@ #include "file.hpp" +#include #include #include -#include #include +#ifdef HAVE_UNISTD_H +# include +#endif + #ifdef HAVE_UTIMENSAT # include # include @@ -37,6 +41,13 @@ # endif #endif +#include +#include +#include +#include +#include +#include + namespace util { void @@ -53,13 +64,126 @@ create_cachedir_tag(const std::string& dir) if (stat) { return; } - try { - Util::write_file(path, cachedir_tag); - } catch (const core::Error& e) { - LOG("Failed to create {}: {}", path, e.what()); + const auto result = util::write_file(path, cachedir_tag); + if (!result) { + LOG("Failed to create {}: {}", path, result.error()); + } +} + +nonstd::expected +read_fd(int fd, DataReceiver data_receiver) +{ + int64_t n; + char buffer[CCACHE_READ_BUFFER_SIZE]; + while ((n = read(fd, buffer, sizeof(buffer))) != 0) { + if (n == -1 && errno != EINTR) { + break; + } + if (n > 0) { + data_receiver(buffer, n); + } + } + if (n == -1) { + return nonstd::make_unexpected(strerror(errno)); + } + return {}; +} + +#ifdef _WIN32 +static bool +has_utf16_le_bom(std::string_view text) +{ + return text.size() > 1 + && ((static_cast(text[0]) == 0xff + && static_cast(text[1]) == 0xfe)); +} +#endif + +template +nonstd::expected +read_file(const std::string& path, size_t size_hint) +{ + if (size_hint == 0) { + const auto stat = Stat::stat(path); + if (!stat) { + LOG("Failed to stat {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + size_hint = stat.size(); + } + + // +1 to be able to detect EOF in the first read call + size_hint = (size_hint < 1024) ? 1024 : size_hint + 1; + + const int open_flags = [] { + if constexpr (std::is_same::value) { + return O_RDONLY | O_TEXT; + } else { + return O_RDONLY | O_BINARY; + } + }(); + Fd fd(open(path.c_str(), open_flags)); + if (!fd) { + LOG("Failed to open {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + + int64_t ret = 0; + size_t pos = 0; + T result; + result.resize(size_hint); + + while (true) { + if (pos == result.size()) { + result.resize(2 * result.size()); + } + const size_t max_read = result.size() - pos; + ret = read(*fd, &result[pos], max_read); + if (ret == 0 || (ret == -1 && errno != EINTR)) { + break; + } + if (ret > 0) { + pos += ret; + if (static_cast(ret) < max_read) { + break; + } + } + } + + if (ret == -1) { + LOG("Failed to read {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + + result.resize(pos); + +#ifdef _WIN32 + if constexpr (std::is_same::value) { + // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM. + // + // Note that this code assumes a little-endian machine, which is why it's + // #ifdef-ed to only run on Windows (which is always little-endian) where + // it's actually needed. + if (has_utf16_le_bom(result)) { + result.erase(0, 2); // Remove BOM. + std::u16string result_as_u16((result.size() / 2) + 1, '\0'); + result_as_u16 = reinterpret_cast(result.c_str()); + std::wstring_convert, char16_t> + converter; + result = converter.to_bytes(result_as_u16); + } } +#endif + + return result; } +template nonstd::expected +read_file(const std::string& path, size_t size_hint); + +template nonstd::expected +read_file(const std::string& path, size_t size_hint); + void set_timestamps(const std::string& path, std::optional mtime, @@ -93,4 +217,42 @@ set_timestamps(const std::string& path, #endif } +nonstd::expected +write_fd(int fd, const void* data, size_t size) +{ + int64_t written = 0; + do { + const auto count = + write(fd, static_cast(data) + written, size - written); + if (count == -1) { + if (errno != EAGAIN && errno != EINTR) { + return nonstd::make_unexpected(strerror(errno)); + } + } else { + written += count; + } + } while (static_cast(written) < size); + return {}; +} + +nonstd::expected +write_file(const std::string& path, const std::string& data) +{ + Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_TEXT, 0666)); + if (!fd) { + return nonstd::make_unexpected(strerror(errno)); + } + return write_fd(*fd, data.data(), data.size()); +} + +nonstd::expected +write_file(const std::string& path, const util::Blob& data) +{ + Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666)); + if (!fd) { + return nonstd::make_unexpected(strerror(errno)); + } + return write_fd(*fd, data.data(), data.size()); +} + } // namespace util diff --git a/src/util/file.hpp b/src/util/file.hpp index 61a66bd1e..886740ff1 100644 --- a/src/util/file.hpp +++ b/src/util/file.hpp @@ -18,6 +18,10 @@ #pragma once +#include + +#include + #include #include #include @@ -28,10 +32,35 @@ namespace util { void create_cachedir_tag(const std::string& dir); +// Read data from `fd` until end of file and call `data_receiver` with the read +// data. Returns an error if the underlying read(2) call returned -1. +nonstd::expected read_fd(int fd, DataReceiver data_receiver); + +// Return data from `path`, where `T` is `std::string` for text data and +// `util::Blob` for binary data. If `T` is `std::string` and the content starts +// with a UTF-16 little-endian BOM on Windows then it will be converted to +// UTF-8. If `size_hint` is not 0 then it is assumed that `path` has this size +// (this saves system calls). +template +nonstd::expected read_file(const std::string& path, + size_t size_hint = 0); + // Set atime/mtime of `path`. If `mtime` is std::nullopt, set to the current // time. If `atime` is std::nullopt, set to what `mtime` specifies. void set_timestamps(const std::string& path, std::optional mtime = std::nullopt, std::optional atime = std::nullopt); +// Write `size` bytes from `data` to `fd`. Returns errno on error. +nonstd::expected +write_fd(int fd, const void* data, size_t size); + +// Write text `data` to `path`. +nonstd::expected write_file(const std::string& path, + const std::string& data); + +// Write binary `data` to `path`. +nonstd::expected write_file(const std::string& path, + const util::Blob& data); + } // namespace util diff --git a/src/util/types.hpp b/src/util/types.hpp new file mode 100644 index 000000000..8c9d19662 --- /dev/null +++ b/src/util/types.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include +#include +#include + +namespace util { + +using Blob = std::vector; +using DataReceiver = std::function; + +} // namespace util diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 4f52bf9ce..9b852250a 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -27,6 +27,7 @@ set( test_util_XXH3_128.cpp test_util_XXH3_64.cpp test_util_expected.cpp + test_util_file.cpp test_util_path.cpp test_util_string.cpp ) diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp new file mode 100644 index 000000000..dbbf66677 --- /dev/null +++ b/unittest/test_util_file.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "TestUtil.hpp" + +#include + +#include + +#include + +using TestUtil::TestContext; + +TEST_CASE("util::read_file and util::write_file, text data") +{ + TestContext test_context; + + REQUIRE(util::write_file("test", "foo\nbar\n")); + auto data = util::read_file("test"); + REQUIRE(data); + CHECK(*data == "foo\nbar\n"); + + REQUIRE(util::write_file("test", "foo\r\nbar\r\n")); + data = util::read_file("test"); + REQUIRE(data); + CHECK(*data == "foo\r\nbar\r\n"); + + // Newline handling + REQUIRE(util::write_file("test", "foo\r\nbar\n")); + auto bin_data = util::read_file("test"); + REQUIRE(bin_data); +#ifdef _WIN32 + const std::string expected_bin_data = "foo\r\r\nbar\r\n"; +#else + const std::string expected_bin_data = "foo\r\nbar\n"; +#endif + CHECK(*bin_data + == util::Blob(expected_bin_data.begin(), expected_bin_data.end())); + + REQUIRE(util::write_file("size_hint_test", std::string(8192, '\0'))); + data = util::read_file("size_hint_test", 8191 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + data = util::read_file("size_hint_test", 8193 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + + data = util::read_file("does/not/exist"); + REQUIRE(!data); + CHECK(data.error() == "No such file or directory"); + + auto result = util::write_file("", "does/not/exist"); + REQUIRE(!result); + CHECK(result.error() == "No such file or directory"); + + result = util::write_file("does/not/exist", "does/not/exist"); + REQUIRE(!result); + CHECK(result.error() == "No such file or directory"); +} + +TEST_CASE("util::read_file and util::write_file, binary data") +{ + TestContext test_context; + + util::Blob expected; + for (size_t i = 0; i < 512; ++i) { + expected.push_back((32 + i) % 256); + } + + CHECK(util::write_file("test", expected)); + auto actual = util::read_file("test"); + REQUIRE(actual); + CHECK(*actual == expected); + + REQUIRE(util::write_file("size_hint_test", util::Blob(8192, 0))); + auto data = util::read_file("size_hint_test", 8191 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + data = util::read_file("size_hint_test", 8193 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); +} + +#ifdef _WIN32 +TEST_CASE("util::read_file with UTF-16 little endian encoding") +{ + TestContext test_context; + + std::string data; + data.push_back(static_cast(0xff)); + data.push_back(static_cast(0xfe)); + data.push_back('a'); + data.push_back('\0'); + data.push_back('b'); + data.push_back('\0'); + data.push_back('c'); + data.push_back('\0'); + + CHECK(util::write_file("test", data)); + auto read_data = util::read_file("test"); + REQUIRE(read_data); + CHECK(*read_data == "abc"); +} +#endif