From 1dfbca0695bd9b5fd97ac123a070ca2bfe52d9dc Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Wed, 4 Nov 2020 14:27:22 +0000 Subject: [PATCH] Optimization: Avoid more SBuf::cow() reallocations (#744) This optimization contains two parts: 1. A no-brainer part that allows SBuf to reuse MemBlob area previously used by other SBufs sharing the same MemBlob. To see this change, follow the "cowAvoided" code path modifications in SBuf::cow(). 2. A part based on a rule of thumb: memmove is usually better than malloc+memcpy. This part of the optimization (follow the "cowShift" path) is only activated if somebody has consume()d from the buffer earlier. The implementation is based on the heuristic that most consuming callers follow the usual append-consume-append-... usage pattern and want to preserve their buffer capacity. MemBlob::consume() API mimics SBuf::consume() and std::string::erase(), ignoring excessive number of bytes rather than throwing an error. Also detailed an old difference between an SBuf::cow() requiring just a new buffer allocation and the one also requiring data copying. Co-Authored-By: Christos Tsantilas --- src/sbuf/MemBlob.cc | 21 ++++++++++ src/sbuf/MemBlob.h | 10 +++++ src/sbuf/SBuf.cc | 36 ++++++++++++---- src/sbuf/Stats.cc | 12 ++++-- src/sbuf/Stats.h | 6 ++- src/tests/testSBuf.cc | 97 +++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 164 insertions(+), 18 deletions(-) diff --git a/src/sbuf/MemBlob.cc b/src/sbuf/MemBlob.cc index 2527c9d7b3..9f91589e94 100644 --- a/src/sbuf/MemBlob.cc +++ b/src/sbuf/MemBlob.cc @@ -124,6 +124,27 @@ MemBlob::append(const char *source, const size_type n) ++Stats.append; } +void +MemBlob::syncSize(const size_type n) +{ + debugs(MEMBLOB_DEBUGSECTION, 7, n << " was: " << size); + Must(LockCount() <= 1); + Must(n <= size); + size = n; +} + +void +MemBlob::consume(const size_type rawN) +{ + if (rawN && size) { + Must(LockCount() <= 1); + const auto n = std::min(rawN, size); + size -= n; + if (size) + memmove(mem, mem + n, size); + } +} + const MemBlobStats& MemBlob::GetStats() { diff --git a/src/sbuf/MemBlob.h b/src/sbuf/MemBlob.h index d138d948d8..38768ec0f6 100644 --- a/src/sbuf/MemBlob.h +++ b/src/sbuf/MemBlob.h @@ -91,9 +91,19 @@ public: */ void append(const char *source, const size_type n); + /* non-const methods below require exclusive object ownership */ + /// extends the available space to the entire allocated blob void clear() { size = 0; } + /// keep the first n bytes and forget the rest of data + /// cannot be used to increase our size; use append*() methods for that + void syncSize(const size_type n); + + /// forget the first n bytes, moving the rest of data (if any) to the start + /// forgets all data (i.e. empties the buffer) if n exceeds size + void consume(const size_type n); + /// dump debugging information std::ostream & dump(std::ostream &os) const; diff --git a/src/sbuf/SBuf.cc b/src/sbuf/SBuf.cc index eb718bb874..c0d9d9c61f 100644 --- a/src/sbuf/SBuf.cc +++ b/src/sbuf/SBuf.cc @@ -167,9 +167,6 @@ SBuf::rawSpace(size_type minSpace) debugs(24, 7, id << " not growing"); return bufEnd(); } - // TODO: we may try to memmove before realloc'ing in order to avoid - // one allocation operation, if we're the sole owners of a MemBlob. - // Maybe some heuristic on off_ and length()? cow(minSpace+length()); return bufEnd(); } @@ -856,12 +853,16 @@ SBuf::reAlloc(size_type newsize) { debugs(24, 8, id << " new size: " << newsize); Must(newsize <= maxSize); + // TODO: Consider realloc(3)ing in some cases instead. MemBlob::Pointer newbuf = new MemBlob(newsize); - if (length() > 0) + if (length() > 0) { newbuf->append(buf(), length()); + ++stats.cowAllocCopy; + } else { + ++stats.cowJustAlloc; + } store_ = newbuf; off_ = 0; - ++stats.cowSlow; debugs(24, 7, id << " new store capacity: " << store_->capacity); } @@ -887,10 +888,27 @@ SBuf::cow(SBuf::size_type newsize) if (newsize == npos || newsize < length()) newsize = length(); - if (store_->LockCount() == 1 && newsize == length()) { - debugs(24, 8, id << " no cow needed"); - ++stats.cowFast; - return; + if (store_->LockCount() == 1) { + // MemBlob::size reflects past owners. Refresh to maximize spaceSize(). + store_->syncSize(off_ + length()); + + const auto availableSpace = spaceSize(); + const auto neededSpace = newsize - length(); + if (neededSpace <= availableSpace) { + debugs(24, 8, id << " no cow needed; have " << availableSpace); + ++stats.cowAvoided; + return; + } + // consume idle leading space if doing so avoids reallocation + // this case is typical for fill-consume-fill-consume-... I/O buffers + if (neededSpace <= availableSpace + off_) { + debugs(24, 8, id << " no cow after shifting " << off_ << " to get " << (availableSpace + off_)); + store_->consume(off_); + off_ = 0; + ++stats.cowShift; + assert(neededSpace <= spaceSize()); + return; + } } reAlloc(newsize); } diff --git a/src/sbuf/Stats.cc b/src/sbuf/Stats.cc index afd75daa65..7d69f3baa1 100644 --- a/src/sbuf/Stats.cc +++ b/src/sbuf/Stats.cc @@ -35,8 +35,10 @@ SBufStats::operator +=(const SBufStats& ss) trim += ss.trim; find += ss.find; caseChange += ss.caseChange; - cowFast += ss.cowFast; - cowSlow += ss.cowSlow; + cowAvoided += ss.cowAvoided; + cowShift += ss.cowShift; + cowJustAlloc += ss.cowJustAlloc; + cowAllocCopy += ss.cowAllocCopy; live += ss.live; return *this; @@ -67,8 +69,10 @@ SBufStats::dump(std::ostream& os) const "\ntrim operations: " << trim << "\nfind: " << find << "\ncase-change ops: " << caseChange << - "\nCOW not actually requiring a copy: " << cowFast << - "\nCOW: " << cowSlow << + "\nCOW completely avoided: " << cowAvoided << + "\nCOW replaced with memmove(3): " << cowShift << + "\nCOW requiring an empty buffer allocation: " << cowJustAlloc << + "\nCOW requiring allocation and copying: " << cowAllocCopy << "\naverage store share factor: " << (ststats.live != 0 ? static_cast(live)/ststats.live : 0) << std::endl; diff --git a/src/sbuf/Stats.h b/src/sbuf/Stats.h index 68096aebdf..e57c8b1177 100644 --- a/src/sbuf/Stats.h +++ b/src/sbuf/Stats.h @@ -46,8 +46,10 @@ public: uint64_t trim = 0; ///= requirements.minSpace); } + + // TODO: Decide whether to encapsulate the (nearly identical) code of the + // gap-related test cases below into a function, obscuring each case logic a + // little, but facilitating new test cases (and removing code duplication). + + { // reserveSpace() uses the trailing space before the front gap + SBuf buffer(fox); + + // assure there is some trailing space + buffer.reserveSpace(1); + CPPUNIT_ASSERT(buffer.spaceSize() > 0); + + // create a leading gap and (weak-)check that it was created + const auto gap = 1U; // the smallest gap may be the most challenging + CPPUNIT_ASSERT(gap < buffer.length()); + const void *gapEnd = buffer.rawContent() + gap; + buffer.consume(gap); + CPPUNIT_ASSERT_EQUAL(gapEnd, static_cast(buffer.rawContent())); + + const auto before = SBuf::GetStats(); + const auto beforeSpaceSize = buffer.spaceSize(); + const void * const beforePosition = buffer.rawContent(); + buffer.reserveSpace(beforeSpaceSize); + const auto after = SBuf::GetStats(); + const void * const afterPosition = buffer.rawContent(); + CPPUNIT_ASSERT_EQUAL(before.cowAvoided + 1, after.cowAvoided); + CPPUNIT_ASSERT_EQUAL(before.cowShift, after.cowShift); + CPPUNIT_ASSERT_EQUAL(before.cowJustAlloc, after.cowJustAlloc); + CPPUNIT_ASSERT_EQUAL(before.cowAllocCopy, after.cowAllocCopy); + CPPUNIT_ASSERT_EQUAL(beforeSpaceSize, buffer.spaceSize()); + CPPUNIT_ASSERT_EQUAL(beforePosition, afterPosition); + CPPUNIT_ASSERT(strcmp(fox + gap, buffer.c_str()) == 0); + } + + { // reserveSpace() uses the front gap when the trailing space is not enough + SBuf buffer(fox); + + // assure there is some trailing space to keep the test case challenging + buffer.reserveSpace(1); + CPPUNIT_ASSERT(buffer.spaceSize() > 0); + const void * const initialStorage = buffer.rawContent(); + + // create a leading gap and (weak-)check that it was created + const auto gap = 1U; // the smallest gap may be the most challenging + CPPUNIT_ASSERT(gap < buffer.length()); + const void *gapEnd = buffer.rawContent() + gap; + buffer.consume(gap); + CPPUNIT_ASSERT_EQUAL(gapEnd, static_cast(buffer.rawContent())); + + const auto before = SBuf::GetStats(); + const auto beforeSpaceSize = buffer.spaceSize(); + buffer.reserveSpace(beforeSpaceSize + gap); // force (entire) gap use + const auto after = SBuf::GetStats(); + const void * const afterStorage = buffer.rawContent(); + CPPUNIT_ASSERT_EQUAL(before.cowAvoided, after.cowAvoided); + CPPUNIT_ASSERT_EQUAL(before.cowShift + 1, after.cowShift); + CPPUNIT_ASSERT_EQUAL(before.cowJustAlloc, after.cowJustAlloc); + CPPUNIT_ASSERT_EQUAL(before.cowAllocCopy, after.cowAllocCopy); + CPPUNIT_ASSERT_EQUAL(initialStorage, afterStorage); + CPPUNIT_ASSERT(beforeSpaceSize + gap <= buffer.spaceSize()); + CPPUNIT_ASSERT(strcmp(fox + gap, buffer.c_str()) == 0); + } + + { // reserveSpace() uses the entire front gap when using the front gap + SBuf buffer(fox); + + // assure there is some trailing space to keep the test case challenging + buffer.reserveSpace(1); + CPPUNIT_ASSERT(buffer.spaceSize() > 0); + const void * const initialStorage = buffer.rawContent(); + + // create a leading gap and (weak-)check that it was created + const auto gap = 2U; // the smallest extra gap may be the most challenging + CPPUNIT_ASSERT(gap < buffer.length()); + const void *gapEnd = buffer.rawContent() + gap; + buffer.consume(gap); + CPPUNIT_ASSERT_EQUAL(gapEnd, static_cast(buffer.rawContent())); + + const auto before = SBuf::GetStats(); + const auto beforeSpaceSize = buffer.spaceSize(); + buffer.reserveSpace(beforeSpaceSize + 1); // force (minimal) gap use + const auto after = SBuf::GetStats(); + const void * const afterStorage = buffer.rawContent(); + CPPUNIT_ASSERT_EQUAL(before.cowAvoided, after.cowAvoided); + CPPUNIT_ASSERT_EQUAL(before.cowShift + 1, after.cowShift); + CPPUNIT_ASSERT_EQUAL(before.cowJustAlloc, after.cowJustAlloc); + CPPUNIT_ASSERT_EQUAL(before.cowAllocCopy, after.cowAllocCopy); + CPPUNIT_ASSERT_EQUAL(initialStorage, afterStorage); + CPPUNIT_ASSERT(beforeSpaceSize + gap <= buffer.spaceSize()); + CPPUNIT_ASSERT(strcmp(fox + gap, buffer.c_str()) == 0); + } } void -- 2.39.5