From: Joel Rosdahl <joel@rosdahl.net>
Date: Wed, 7 Sep 2022 12:57:17 +0000 (+0200)
Subject: enhance: Add util::read_file_part function
X-Git-Tag: v4.7~66
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=caf242723a991a77f229786374d71dc805a7af42;p=thirdparty%2Fccache.git

enhance: Add util::read_file_part function
---

diff --git a/src/util/file.cpp b/src/util/file.cpp
index 7f100ce9b..b4d0c286a 100644
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -188,6 +188,54 @@ read_file(const std::string& path, size_t size_hint);
 template nonstd::expected<std::vector<uint8_t>, std::string>
 read_file(const std::string& path, size_t size_hint);
 
+template<typename T>
+nonstd::expected<T, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count)
+{
+  Fd fd(open(path.c_str(), O_RDONLY | O_BINARY));
+  if (!fd) {
+    LOG("Failed to open {}: {}", path, strerror(errno));
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  if (pos != 0 && lseek(*fd, pos, SEEK_SET) != static_cast<off_t>(pos)) {
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  int64_t ret = 0;
+  size_t bytes_read = 0;
+  T result;
+  result.resize(count);
+
+  while (true) {
+    const size_t max_read = count - bytes_read;
+    ret = read(*fd, &result[bytes_read], max_read);
+    if (ret == 0 || (ret == -1 && errno != EINTR)) {
+      break;
+    }
+    if (ret > 0) {
+      bytes_read += ret;
+      if (bytes_read == count) {
+        break;
+      }
+    }
+  }
+
+  if (ret == -1) {
+    LOG("Failed to read {}: {}", path, strerror(errno));
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  result.resize(bytes_read);
+  return result;
+}
+
+template nonstd::expected<util::Bytes, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
+template nonstd::expected<std::vector<uint8_t>, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
 void
 set_timestamps(const std::string& path,
                std::optional<timespec> mtime,
diff --git a/src/util/file.hpp b/src/util/file.hpp
index e57820210..7ebdbdbec 100644
--- a/src/util/file.hpp
+++ b/src/util/file.hpp
@@ -23,6 +23,8 @@
 #include <third_party/nonstd/expected.hpp>
 #include <third_party/nonstd/span.hpp>
 
+#include <cstddef>
+#include <cstdint>
 #include <ctime>
 #include <optional>
 #include <string>
@@ -53,6 +55,15 @@ template<typename T>
 nonstd::expected<T, std::string> read_file(const std::string& path,
                                            size_t size_hint = 0);
 
+// Return (at most) `count` bytes from `path` starting at position `pos`.
+//
+// `T` should be `util::Bytes` or `std::vector<uint8_t>`. If `T` is
+// `std::string` and the content starts with a UTF-16 little-endian BOM on
+// Windows then it will be converted to UTF-8.
+template<typename T>
+nonstd::expected<T, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
 // Set atime/mtime of `path`. If `mtime` is std::nullopt, set to the current
 // time. If `atime` is std::nullopt, set to what `mtime` specifies.
 void set_timestamps(const std::string& path,
diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp
index eec2de43e..9b99d1501 100644
--- a/unittest/test_util_file.cpp
+++ b/unittest/test_util_file.cpp
@@ -18,11 +18,13 @@
 
 #include "TestUtil.hpp"
 
+#include <util/Bytes.hpp>
 #include <util/file.hpp>
 
 #include <third_party/doctest.h>
 
 #include <cstring>
+#include <string_view>
 
 using TestUtil::TestContext;
 
@@ -120,3 +122,26 @@ TEST_CASE("util::read_file<std::string> with UTF-16 little endian encoding")
   CHECK(*read_data == "abc");
 }
 #endif
+
+TEST_CASE("util::read_file_part")
+{
+  auto arr_from_str = [](std::string_view str) {
+    return util::Bytes(str.data(), str.size());
+  };
+
+  CHECK(util::write_file("test", "banana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 0) == arr_from_str(""));
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 6)
+        == arr_from_str("banana"));
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 1000)
+        == arr_from_str("banana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 0) == arr_from_str(""));
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 2) == arr_from_str("an"));
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 1000)
+        == arr_from_str("ana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 1000, 1000)
+        == arr_from_str(""));
+}