enhance: Add util::read_file_part function

author Joel Rosdahl <joel@rosdahl.net>

Wed, 7 Sep 2022 12:57:17 +0000 (14:57 +0200)

committer Joel Rosdahl <joel@rosdahl.net>

Wed, 21 Sep 2022 15:06:27 +0000 (17:06 +0200)
author Joel Rosdahl <joel@rosdahl.net>
Wed, 7 Sep 2022 12:57:17 +0000 (14:57 +0200)
committer Joel Rosdahl <joel@rosdahl.net>
Wed, 21 Sep 2022 15:06:27 +0000 (17:06 +0200)
diff --git a/src/util/file.cpp b/src/util/file.cpp

index 7f100ce9b7cdffd11d204133296eaa6de895c1a8..b4d0c286a1c09f768ea2a838a117f5ecf7e676d4 100644 (file)
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -188,6 +188,54 @@ read_file(const std::string& path, size_t size_hint);
  template nonstd::expected<std::vector<uint8_t>, std::string>
  read_file(const std::string& path, size_t size_hint);
  
+template<typename T>
+nonstd::expected<T, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count)
+{
+  Fd fd(open(path.c_str(), O_RDONLY | O_BINARY));
+  if (!fd) {
+    LOG("Failed to open {}: {}", path, strerror(errno));
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  if (pos != 0 && lseek(*fd, pos, SEEK_SET) != static_cast<off_t>(pos)) {
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  int64_t ret = 0;
+  size_t bytes_read = 0;
+  T result;
+  result.resize(count);
+
+  while (true) {
+    const size_t max_read = count - bytes_read;
+    ret = read(*fd, &result[bytes_read], max_read);
+    if (ret == 0 || (ret == -1 && errno != EINTR)) {
+      break;
+    }
+    if (ret > 0) {
+      bytes_read += ret;
+      if (bytes_read == count) {
+        break;
+      }
+    }
+  }
+
+  if (ret == -1) {
+    LOG("Failed to read {}: {}", path, strerror(errno));
+    return nonstd::make_unexpected(strerror(errno));
+  }
+
+  result.resize(bytes_read);
+  return result;
+}
+
+template nonstd::expected<util::Bytes, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
+template nonstd::expected<std::vector<uint8_t>, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
  void
  set_timestamps(const std::string& path,
                 std::optional<timespec> mtime,
diff --git a/src/util/file.hpp b/src/util/file.hpp

index e57820210148145ddc8a1f56e6e66cf2e7211b98..7ebdbdbec85b327314e9e393aedabba1f5ae829c 100644 (file)
--- a/src/util/file.hpp
+++ b/src/util/file.hpp
@@ -23,6 +23,8 @@
  #include <third_party/nonstd/expected.hpp>
  #include <third_party/nonstd/span.hpp>
  
+#include <cstddef>
+#include <cstdint>
  #include <ctime>
  #include <optional>
  #include <string>
@@ -53,6 +55,15 @@ template<typename T>
  nonstd::expected<T, std::string> read_file(const std::string& path,
                                             size_t size_hint = 0);
  
+// Return (at most) `count` bytes from `path` starting at position `pos`.
+//
+// `T` should be `util::Bytes` or `std::vector<uint8_t>`. If `T` is
+// `std::string` and the content starts with a UTF-16 little-endian BOM on
+// Windows then it will be converted to UTF-8.
+template<typename T>
+nonstd::expected<T, std::string>
+read_file_part(const std::string& path, size_t pos, size_t count);
+
  // Set atime/mtime of `path`. If `mtime` is std::nullopt, set to the current
  // time. If `atime` is std::nullopt, set to what `mtime` specifies.
  void set_timestamps(const std::string& path,
diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp

index eec2de43e95720df7ec78f5105ee0871197bfee4..9b99d15014665a83284d2a4024e3733dc9fe07a2 100644 (file)
--- a/unittest/test_util_file.cpp
+++ b/unittest/test_util_file.cpp
@@ -18,11 +18,13 @@
  
  #include "TestUtil.hpp"
  
+#include <util/Bytes.hpp>
  #include <util/file.hpp>
  
  #include <third_party/doctest.h>
  
  #include <cstring>
+#include <string_view>
  
  using TestUtil::TestContext;
  
@@ -120,3 +122,26 @@ TEST_CASE("util::read_file<std::string> with UTF-16 little endian encoding")
    CHECK(*read_data == "abc");
  }
  #endif
+
+TEST_CASE("util::read_file_part")
+{
+  auto arr_from_str = [](std::string_view str) {
+    return util::Bytes(str.data(), str.size());
+  };
+
+  CHECK(util::write_file("test", "banana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 0) == arr_from_str(""));
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 6)
+        == arr_from_str("banana"));
+  CHECK(util::read_file_part<util::Bytes>("test", 0, 1000)
+        == arr_from_str("banana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 0) == arr_from_str(""));
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 2) == arr_from_str("an"));
+  CHECK(util::read_file_part<util::Bytes>("test", 3, 1000)
+        == arr_from_str("ana"));
+
+  CHECK(util::read_file_part<util::Bytes>("test", 1000, 1000)
+        == arr_from_str(""));
+}
author	Joel Rosdahl <joel@rosdahl.net>
	Wed, 7 Sep 2022 12:57:17 +0000 (14:57 +0200)
committer	Joel Rosdahl <joel@rosdahl.net>
	Wed, 21 Sep 2022 15:06:27 +0000 (17:06 +0200)
src/util/file.cpp		patch \| blob \| blame \| history
src/util/file.hpp		patch \| blob \| blame \| history
unittest/test_util_file.cpp		patch \| blob \| blame \| history