From 501ed5e2bc27f4c8804de1c0b47012820c0f7e7b Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Wed, 7 Sep 2022 10:17:21 +0200 Subject: [PATCH] enhance: Add util::Bytes class --- src/util/Bytes.cpp | 138 ++++++++++++++ src/util/Bytes.hpp | 216 +++++++++++++++++++++ src/util/CMakeLists.txt | 1 + src/util/file.cpp | 4 + src/util/file.hpp | 14 +- unittest/CMakeLists.txt | 1 + unittest/test_util_Bytes.cpp | 358 +++++++++++++++++++++++++++++++++++ 7 files changed, 727 insertions(+), 5 deletions(-) create mode 100644 src/util/Bytes.cpp create mode 100644 src/util/Bytes.hpp create mode 100644 unittest/test_util_Bytes.cpp diff --git a/src/util/Bytes.cpp b/src/util/Bytes.cpp new file mode 100644 index 000000000..f06eeee19 --- /dev/null +++ b/src/util/Bytes.cpp @@ -0,0 +1,138 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "Bytes.hpp" + +#include + +namespace util { + +Bytes::Bytes(const Bytes& other) noexcept + : m_size(other.m_size), + m_capacity(other.m_size) +{ + delete[] m_data; + m_data = new uint8_t[m_size]; + if (m_size > 0) { + std::memcpy(m_data, other.m_data, m_size); + } +} + +Bytes::Bytes(Bytes&& other) noexcept +{ + delete[] m_data; + m_data = other.m_data; + m_size = other.m_size; + m_capacity = other.m_capacity; + other.m_data = nullptr; + other.m_size = 0; + other.m_capacity = 0; +} + +Bytes& +Bytes::operator=(const Bytes& other) noexcept +{ + if (&other == this) { + return *this; + } + delete[] m_data; + m_data = new uint8_t[other.m_size]; + m_size = other.m_size; + m_capacity = other.m_size; + if (m_size > 0) { + std::memcpy(m_data, other.m_data, m_size); + } + return *this; +} + +Bytes& +Bytes::operator=(Bytes&& other) noexcept +{ + if (&other == this) { + return *this; + } + delete[] m_data; + m_data = other.m_data; + m_size = other.m_size; + m_capacity = other.m_capacity; + other.m_data = nullptr; + other.m_size = 0; + other.m_capacity = 0; + return *this; +} + +void +Bytes::reserve(size_t size) noexcept +{ + if (size > m_capacity) { + uint8_t* data = new uint8_t[size]; + if (m_size > 0) { + std::memcpy(data, m_data, m_size); + } + delete[] m_data; + m_data = data; + m_capacity = size; + } +} + +void +Bytes::insert(const uint8_t* pos, + const uint8_t* first, + const uint8_t* last) noexcept +{ + const size_t inserted_size = last - first; + if (inserted_size == 0) { + return; + } + const size_t offset = pos - m_data; + if (m_size + inserted_size > m_capacity) { + m_capacity = std::max(2 * m_capacity, m_size + inserted_size); + uint8_t* new_data = new uint8_t[m_capacity]; + if (offset > 0) { + std::memcpy(new_data, m_data, offset); + } + if (m_size > offset) { + std::memcpy( + new_data + offset + inserted_size, m_data + offset, m_size - offset); + } + delete[] m_data; + m_data = new_data; + } else if (m_size > offset) { + std::memmove( + m_data + offset + inserted_size, m_data + offset, m_size - offset); + } + std::memcpy(m_data + offset, first, inserted_size); + m_size += inserted_size; +} + +void +Bytes::resize(size_t size) noexcept +{ + if (size > m_capacity) { + uint8_t* new_data = new uint8_t[size]; + if (m_size > 0) { + std::memcpy(new_data, m_data, m_size); + } + delete[] m_data; + m_data = new_data; + m_capacity = size; + } + m_size = size; +} + +} // namespace util diff --git a/src/util/Bytes.hpp b/src/util/Bytes.hpp new file mode 100644 index 000000000..7716f1382 --- /dev/null +++ b/src/util/Bytes.hpp @@ -0,0 +1,216 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include + +#include +#include +#include + +namespace util { + +// This class represents a contiguous array of bytes. +// +// The primary motivation for this class instead of just using +// std::vector is to make zero copying without zero-filling possible +// when retrieving data from syscalls like read(2), i.e, when +// std::vector::insert cannot be used. +class Bytes +{ +public: + Bytes() noexcept = default; + explicit Bytes(size_t size) noexcept; + + Bytes(const void* data, size_t size) noexcept; + Bytes(nonstd::span data) noexcept; + + Bytes(const Bytes& other) noexcept; + Bytes(Bytes&& other) noexcept; + + Bytes(std::initializer_list init) noexcept; + + ~Bytes() noexcept; + + Bytes& operator=(const Bytes& other) noexcept; + Bytes& operator=(Bytes&& other) noexcept; + + uint8_t operator[](size_t pos) const noexcept; + uint8_t& operator[](size_t pos) noexcept; + + bool operator==(const Bytes& other) const noexcept; + bool operator!=(const Bytes& other) const noexcept; + + const uint8_t* data() const noexcept; + uint8_t* data() noexcept; + + uint8_t* begin() noexcept; + const uint8_t* begin() const noexcept; + const uint8_t* cbegin() const noexcept; + + uint8_t* end() noexcept; + const uint8_t* end() const noexcept; + const uint8_t* cend() const noexcept; + + bool empty() const noexcept; + size_t size() const noexcept; + size_t capacity() const noexcept; + void reserve(size_t size) noexcept; + + void clear() noexcept; + void insert(const uint8_t* pos, + const uint8_t* first, + const uint8_t* last) noexcept; + void resize(size_t size) noexcept; // Note: New bytes will be uninitialized. + +private: + uint8_t* m_data = nullptr; + size_t m_size = 0; + size_t m_capacity = 0; +}; + +inline Bytes::Bytes(size_t size) noexcept + : m_data(new uint8_t[size]), + m_size(size), + m_capacity(size) +{ +} + +inline Bytes::Bytes(const void* data, size_t size) noexcept + : m_data(new uint8_t[size]), + m_size(size), + m_capacity(size) +{ + std::memcpy(m_data, data, size); +} + +inline Bytes::Bytes(nonstd::span data) noexcept + : Bytes(data.data(), data.size()) +{ +} + +inline Bytes::Bytes(std::initializer_list init) noexcept + : Bytes({init.begin(), init.end()}) +{ +} + +inline Bytes::~Bytes() noexcept +{ + delete[] m_data; +} + +inline uint8_t +Bytes::operator[](size_t pos) const noexcept +{ + return m_data[pos]; +} + +inline uint8_t& +Bytes::operator[](size_t pos) noexcept +{ + return m_data[pos]; +} + +inline bool +Bytes::operator==(const Bytes& other) const noexcept +{ + return this == &other + || (m_size == other.m_size + && std::memcmp(m_data, other.m_data, m_size) == 0); +} + +inline bool +Bytes::operator!=(const Bytes& other) const noexcept +{ + return !(*this == other); +} + +inline const uint8_t* +Bytes::data() const noexcept +{ + return m_data; +} + +inline uint8_t* +Bytes::data() noexcept +{ + return m_data; +} + +inline uint8_t* +Bytes::begin() noexcept +{ + return m_data; +} + +inline const uint8_t* +Bytes::begin() const noexcept +{ + return m_data; +} + +inline const uint8_t* +Bytes::cbegin() const noexcept +{ + return m_data; +} + +inline uint8_t* +Bytes::end() noexcept +{ + return m_data + m_size; +} + +inline const uint8_t* +Bytes::end() const noexcept +{ + return m_data + m_size; +} + +inline const uint8_t* +Bytes::cend() const noexcept +{ + return m_data + m_size; +} + +inline bool +Bytes::empty() const noexcept +{ + return m_size == 0; +} + +inline size_t +Bytes::size() const noexcept +{ + return m_size; +} + +inline size_t +Bytes::capacity() const noexcept +{ + return m_capacity; +} + +inline void +Bytes::clear() noexcept +{ + m_size = 0; +} + +} // namespace util diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index ff3eee652..d02d3ca6b 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,6 @@ set( sources + Bytes.cpp LockFile.cpp TextTable.cpp Tokenizer.cpp diff --git a/src/util/file.cpp b/src/util/file.cpp index 7b2b27c90..7f100ce9b 100644 --- a/src/util/file.cpp +++ b/src/util/file.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef HAVE_UNISTD_H # include @@ -178,6 +179,9 @@ read_file(const std::string& path, size_t size_hint) return result; } +template nonstd::expected +read_file(const std::string& path, size_t size_hint); + template nonstd::expected read_file(const std::string& path, size_t size_hint); diff --git a/src/util/file.hpp b/src/util/file.hpp index b766b5570..e57820210 100644 --- a/src/util/file.hpp +++ b/src/util/file.hpp @@ -40,11 +40,15 @@ void create_cachedir_tag(const std::string& dir); // data. Returns an error if the underlying read(2) call returned -1. nonstd::expected read_fd(int fd, DataReceiver data_receiver); -// Return data from `path`, where `T` is `std::string` for text data and -// `std::vector` for binary data. If `T` is `std::string` and the -// content starts with a UTF-16 little-endian BOM on Windows then it will be -// converted to UTF-8. If `size_hint` is not 0 then it is assumed that `path` -// has this size (this saves system calls). +// Return contents of file at `path`. +// +// `T` should be `util::Bytes` or `std::vector` for binary data and +// `std::string` for text data. If `T` is `std::string` and the content starts +// with a UTF-16 little-endian BOM on Windows then it will be converted to +// UTF-8. +// +// If `size_hint` is not 0 then it is assumed that `path` has this size (this +// saves system calls). template nonstd::expected read_file(const std::string& path, size_t size_hint = 0); diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 9b852250a..bdfb0270a 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -21,6 +21,7 @@ set( test_hashutil.cpp test_storage_primary_StatsFile.cpp test_storage_primary_util.cpp + test_util_Bytes.cpp test_util_LockFile.cpp test_util_TextTable.cpp test_util_Tokenizer.cpp diff --git a/unittest/test_util_Bytes.cpp b/unittest/test_util_Bytes.cpp new file mode 100644 index 000000000..4ff07b008 --- /dev/null +++ b/unittest/test_util_Bytes.cpp @@ -0,0 +1,358 @@ +// Copyright (C) 2021-2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include + +#include +#include + +#include + +TEST_SUITE_BEGIN("util::Bytes"); + +using util::Bytes; + +TEST_CASE("Basics") +{ + Bytes bytes1("abc", 3); + + SUBCASE("Default construction") + { + Bytes bytes0; + + CHECK(bytes0.data() == nullptr); + CHECK(bytes0.size() == 0); + CHECK(bytes0.capacity() == 0); + } + + SUBCASE("Sized construction") + { + Bytes bytes2(42); + + CHECK(bytes2.data() != nullptr); + CHECK(bytes2.size() == 42); + CHECK(bytes2.capacity() == 42); + } + + SUBCASE("Construction from data and size") + { + CHECK(bytes1.data() != nullptr); + REQUIRE(bytes1.size() == 3); + REQUIRE(bytes1.capacity() == 3); + CHECK(bytes1[0] == 'a'); + CHECK(bytes1[1] == 'b'); + CHECK(bytes1[2] == 'c'); + } + + SUBCASE("Construction from span") + { + std::vector vector{'a', 'b', 'c'}; + Bytes bytes2(vector); + + CHECK(bytes2.data() != nullptr); + REQUIRE(bytes2.size() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + } + + SUBCASE("Copy construction") + { + const Bytes bytes2(bytes1); + + CHECK(bytes2.data() != nullptr); + CHECK(bytes2.data() != bytes1.data()); + REQUIRE(bytes2.size() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + } + + SUBCASE("Move construction") + { + const auto bytes1_orig_data = bytes1.data(); + Bytes bytes2(std::move(bytes1)); + + CHECK(bytes1.data() == nullptr); + CHECK(bytes1.size() == 0); + + CHECK(bytes2.data() != nullptr); + CHECK(bytes2.data() == bytes1_orig_data); + REQUIRE(bytes2.size() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + } + + SUBCASE("Construction from initializer list") + { + Bytes bytes2{'a', 'b', 'c'}; + CHECK(bytes2 == bytes1); + } + + SUBCASE("Copy assignment") + { + Bytes bytes2; + + bytes2 = bytes1; + CHECK(bytes2.data() != nullptr); + CHECK(bytes2.data() != bytes1.data()); + REQUIRE(bytes2.size() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + } + + SUBCASE("Move assignment") + { + const auto bytes1_orig_data = bytes1.data(); + Bytes bytes2; + bytes2 = std::move(bytes1); + + CHECK(bytes1.data() == nullptr); + CHECK(bytes1.size() == 0); + + CHECK(bytes2.data() == bytes1_orig_data); + REQUIRE(bytes2.size() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + } + + SUBCASE("Assignment from initializer list") + { + Bytes bytes2; + bytes2 = {'a', 'b', 'c'}; + CHECK(bytes2 == bytes1); + } + + SUBCASE("Non-const operator[]") + { + bytes1[1] = 'x'; + CHECK(bytes1[1] == 'x'); + } + + SUBCASE("Comparison") + { + CHECK(bytes1 == bytes1); + CHECK(!(bytes1 != bytes1)); + + Bytes bytes2(bytes1); + CHECK(bytes2 == bytes1); + CHECK(!(bytes2 != bytes1)); + + Bytes bytes3; + CHECK(bytes3 != bytes1); + CHECK(!(bytes3 == bytes1)); + + Bytes bytes4("xyz", 3); + CHECK(bytes4 != bytes1); + CHECK(!(bytes4 == bytes1)); + } + SUBCASE("Begin") + { + const Bytes bytes2(bytes1); + + CHECK(bytes1.begin() == bytes1.data()); + CHECK(bytes2.begin() == bytes2.data()); + CHECK(bytes1.cbegin() == bytes1.data()); + } + + SUBCASE("end") + { + const Bytes bytes2(bytes1); + + CHECK(bytes1.end() == bytes1.data() + bytes1.size()); + CHECK(bytes2.end() == bytes2.data() + bytes2.size()); + CHECK(bytes1.cend() == bytes1.data() + bytes1.size()); + } + + SUBCASE("Clear and empty") + { + CHECK(bytes1.size() == 3); + CHECK(bytes1.capacity() == 3); + CHECK(!bytes1.empty()); + + bytes1.clear(); + + CHECK(bytes1.size() == 0); + CHECK(bytes1.capacity() == 3); + CHECK(bytes1.empty()); + } + + SUBCASE("Reserve and capacity") + { + const auto bytes1_orig_data = bytes1.data(); + CHECK(bytes1.size() == 3); + CHECK(bytes1.capacity() == 3); + + bytes1.reserve(2); + CHECK(bytes1.size() == 3); + CHECK(bytes1.capacity() == 3); + CHECK(bytes1.data() == bytes1_orig_data); + + bytes1.reserve(4); + CHECK(bytes1.size() == 3); + CHECK(bytes1.capacity() == 4); + CHECK(bytes1.data() != bytes1_orig_data); + } + + SUBCASE("Increase size") + { + const auto bytes1_orig_data = bytes1.data(); + bytes1.resize(4); + CHECK(bytes1.data() != bytes1_orig_data); + CHECK(bytes1.size() == 4); + CHECK(bytes1.capacity() == 4); + CHECK(bytes1[0] == 'a'); + CHECK(bytes1[1] == 'b'); + CHECK(bytes1[2] == 'c'); + } + + SUBCASE("Decrease size") + { + const auto bytes1_orig_data = bytes1.data(); + bytes1.resize(2); + CHECK(bytes1.data() == bytes1_orig_data); + CHECK(bytes1.size() == 2); + CHECK(bytes1.capacity() == 3); + CHECK(bytes1[0] == 'a'); + CHECK(bytes1[1] == 'b'); + } + + SUBCASE("Insert") + { + Bytes bytes2; + + bytes2.insert(bytes2.end(), bytes1.begin(), bytes1.end()); + CHECK(bytes2.size() == 3); + CHECK(bytes2.capacity() == 3); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + + // Insert at end, reallocating. + bytes2.insert(bytes2.end(), bytes1.begin(), bytes1.begin() + 1); + CHECK(bytes2.size() == 4); + CHECK(bytes2.capacity() == 6); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + CHECK(bytes2[3] == 'a'); + + // Insert at end, not reallocating. + Bytes bytes3("xyz", 3); + bytes2.insert(bytes2.end(), bytes3.begin(), bytes3.begin() + 1); + CHECK(bytes2.size() == 5); + CHECK(bytes2.capacity() == 6); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'c'); + CHECK(bytes2[3] == 'a'); + CHECK(bytes2[4] == 'x'); + + // Insert in middle, reallocating. + bytes2.insert(bytes2.begin() + 2, bytes3.begin(), bytes3.end()); + CHECK(bytes2.size() == 8); + CHECK(bytes2.capacity() == 12); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'b'); + CHECK(bytes2[2] == 'x'); + CHECK(bytes2[3] == 'y'); + CHECK(bytes2[4] == 'z'); + CHECK(bytes2[5] == 'c'); + CHECK(bytes2[6] == 'a'); + CHECK(bytes2[7] == 'x'); + + // Insert in middle, not reallocating. + bytes2.insert(bytes2.begin() + 1, bytes3.begin(), bytes3.begin() + 2); + CHECK(bytes2.size() == 10); + CHECK(bytes2.capacity() == 12); + CHECK(bytes2[0] == 'a'); + CHECK(bytes2[1] == 'x'); + CHECK(bytes2[2] == 'y'); + CHECK(bytes2[3] == 'b'); + CHECK(bytes2[4] == 'x'); + CHECK(bytes2[5] == 'y'); + CHECK(bytes2[6] == 'z'); + CHECK(bytes2[7] == 'c'); + CHECK(bytes2[8] == 'a'); + CHECK(bytes2[9] == 'x'); + + // Insert at beginning, reallocating. + bytes2.insert(bytes2.begin(), bytes3.begin(), bytes3.end()); + CHECK(bytes2.size() == 13); + CHECK(bytes2.capacity() == 24); + CHECK(bytes2[0] == 'x'); + CHECK(bytes2[1] == 'y'); + CHECK(bytes2[2] == 'z'); + CHECK(bytes2[3] == 'a'); + CHECK(bytes2[4] == 'x'); + CHECK(bytes2[5] == 'y'); + CHECK(bytes2[6] == 'b'); + CHECK(bytes2[7] == 'x'); + CHECK(bytes2[8] == 'y'); + CHECK(bytes2[9] == 'z'); + CHECK(bytes2[10] == 'c'); + CHECK(bytes2[11] == 'a'); + CHECK(bytes2[12] == 'x'); + + // Insert at beginning, not reallocating. + bytes2.insert(bytes2.begin(), bytes3.begin() + 2, bytes3.begin() + 3); + CHECK(bytes2.size() == 14); + CHECK(bytes2.capacity() == 24); + CHECK(bytes2[0] == 'z'); + CHECK(bytes2[1] == 'x'); + CHECK(bytes2[2] == 'y'); + CHECK(bytes2[3] == 'z'); + CHECK(bytes2[4] == 'a'); + CHECK(bytes2[5] == 'x'); + CHECK(bytes2[6] == 'y'); + CHECK(bytes2[7] == 'b'); + CHECK(bytes2[8] == 'x'); + CHECK(bytes2[9] == 'y'); + CHECK(bytes2[10] == 'z'); + CHECK(bytes2[11] == 'c'); + CHECK(bytes2[12] == 'a'); + CHECK(bytes2[13] == 'x'); + } +} + +TEST_CASE("Conversion to span") +{ + Bytes bytes; + bytes.resize(42); + + SUBCASE("Const span") + { + nonstd::span span(bytes); + CHECK(span.data() == bytes.data()); + CHECK(span.size() == bytes.size()); + } + + SUBCASE("Non-const span") + { + nonstd::span span(bytes); + CHECK(span.data() == bytes.data()); + CHECK(span.size() == bytes.size()); + span[1] = 'x'; + CHECK(bytes[1] == 'x'); + } +} + +TEST_SUITE_END(); -- 2.47.2