2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 #include "base/CharacterSet.h"
11 #include "base/RefCount.h"
13 #include "sbuf/DetailedStats.h"
14 #include "sbuf/SBuf.h"
22 InstanceIdDefinitions(SBuf
, "SBuf");
24 SBufStats
SBuf::stats
;
25 const SBuf::size_type
SBuf::npos
;
26 const SBuf::size_type
SBuf::maxSize
;
28 SBuf::SBuf() : store_(GetStorePrototype())
30 debugs(24, 8, id
<< " created");
35 SBuf::SBuf(const SBuf
&S
)
36 : store_(S
.store_
), off_(S
.off_
), len_(S
.len_
)
38 debugs(24, 8, id
<< " created from id " << S
.id
);
44 SBuf::SBuf(const std::string
&s
) : store_(GetStorePrototype())
46 debugs(24, 8, id
<< " created from std::string");
47 lowAppend(s
.data(),s
.length());
52 SBuf::SBuf(const char *S
, size_type n
) : store_(GetStorePrototype())
56 ++stats
.allocFromCString
;
60 SBuf::SBuf(const char *S
) : store_(GetStorePrototype())
64 ++stats
.allocFromCString
;
70 debugs(24, 8, id
<< " destructed");
72 recordSBufSizeAtDestruct(len_
);
76 SBuf::GetStorePrototype()
78 static MemBlob::Pointer InitialStore
= new MemBlob(0);
83 SBuf::assign(const SBuf
&S
)
85 debugs(24, 7, "assigning " << id
<< " from " << S
.id
);
86 if (&S
== this) //assignment to self. Noop.
96 SBuf::assign(const char *S
, size_type n
)
98 const Locker
blobKeeper(this, S
);
99 debugs(24, 6, id
<< " from c-string, n=" << n
<< ")");
101 return append(S
, n
); //bounds checked in append()
105 SBuf::reserveCapacity(size_type minCapacity
)
107 Must(minCapacity
<= maxSize
);
112 SBuf::reserve(const SBufReservationRequirements
&req
)
114 debugs(24, 8, id
<< " was: " << off_
<< '+' << len_
<< '+' << spaceSize() <<
115 '=' << store_
->capacity
);
117 const bool mustRealloc
= !req
.allowShared
&& store_
->LockCount() > 1;
119 if (!mustRealloc
&& spaceSize() >= req
.minSpace
)
120 return spaceSize(); // the caller is content with what we have
122 /* only reallocation can make the caller happy */
124 if (!mustRealloc
&& len_
>= req
.maxCapacity
)
125 return spaceSize(); // but we cannot reallocate
127 const size_type desiredSpace
= std::max(req
.minSpace
, req
.idealSpace
);
128 const size_type newSpace
= std::min(desiredSpace
, maxSize
- len_
);
129 reserveCapacity(std::min(len_
+ newSpace
, req
.maxCapacity
));
130 debugs(24, 7, id
<< " now: " << off_
<< '+' << len_
<< '+' << spaceSize() <<
131 '=' << store_
->capacity
);
132 return spaceSize(); // reallocated and probably reserved enough space
136 SBuf::rawAppendStart(size_type anticipatedSize
)
138 char *space
= rawSpace(anticipatedSize
);
139 debugs(24, 8, id
<< " start appending up to " << anticipatedSize
<< " bytes");
144 SBuf::rawAppendFinish(const char *start
, size_type actualSize
)
146 Must(bufEnd() == start
);
147 Must(store_
->canAppend(off_
+ len_
, actualSize
));
148 debugs(24, 8, id
<< " finish appending " << actualSize
<< " bytes");
150 size_type newSize
= length() + actualSize
;
151 Must3(newSize
<= min(maxSize
, store_
->capacity
-off_
), "raw append fits", Here());
153 store_
->size
= off_
+ newSize
;
157 SBuf::rawSpace(size_type minSpace
)
159 Must(length() <= maxSize
- minSpace
);
160 debugs(24, 7, "reserving " << minSpace
<< " for " << id
);
162 // we're not concerned about RefCounts here,
163 // the store knows the last-used portion. If
164 // it's available, we're effectively claiming ownership
165 // of it. If it's not, we need to go away (realloc)
166 if (store_
->canAppend(off_
+len_
, minSpace
)) {
167 debugs(24, 7, id
<< " not growing");
170 cow(minSpace
+length());
178 //enabling this code path, the store will be freed and reinitialized
179 store_
= GetStorePrototype(); //uncomment to actually free storage upon clear()
181 //enabling this code path, we try to release the store without deallocating it.
182 // will be lazily reallocated if needed.
183 if (store_
->LockCount() == 1)
192 SBuf::append(const SBuf
&S
)
194 if (isEmpty() && store_
== GetStorePrototype())
195 return (*this = S
); // optimization: avoid needless copying
197 const Locker
blobKeeper(this, S
.buf());
198 return lowAppend(S
.buf(), S
.length());
202 SBuf::append(const char * S
, size_type Ssize
)
204 const Locker
blobKeeper(this, S
);
207 if (Ssize
== SBuf::npos
)
209 debugs(24, 7, "from c-string to id " << id
);
210 // coverity[access_dbuff_in_call]
211 return lowAppend(S
, Ssize
);
215 SBuf::append(const char c
)
217 return lowAppend(&c
, 1);
221 SBuf::Printf(const char *fmt
, ...)
223 // with printf() the fmt or an arg might be a dangerous char*
224 // NP: can't rely on vappendf() Locker because of clear()
225 const Locker
blobKeeper(this, buf());
236 SBuf::appendf(const char *fmt
, ...)
246 SBuf::vappendf(const char *fmt
, va_list vargs
)
248 // with (v)appendf() the fmt or an arg might be a dangerous char*
249 const Locker
blobKeeper(this, buf());
253 //reserve twice the format-string size, it's a likely heuristic
254 size_type requiredSpaceEstimate
= strlen(fmt
)*2;
256 char *space
= rawSpace(requiredSpaceEstimate
);
259 sz
= vsnprintf(space
, spaceSize(), fmt
, ap
);
261 Must3(sz
>= 0, "vsnprintf() succeeds", Here());
263 /* check for possible overflow */
264 /* snprintf on Linux returns -1 on output errors, or the size
265 * that would have been written if enough space had been available */
266 /* vsnprintf is standard in C99 */
268 if (sz
>= static_cast<int>(spaceSize())) {
269 // not enough space on the first go, we now know how much we need
270 requiredSpaceEstimate
= sz
*2; // TODO: tune heuristics
271 space
= rawSpace(requiredSpaceEstimate
);
272 sz
= vsnprintf(space
, spaceSize(), fmt
, vargs
);
273 Must3(sz
>= 0, "vsnprintf() succeeds (with increased buffer space)", Here());
276 // data was appended, update internal state
279 /* C99 specifies that the final '\0' is not counted in vsnprintf's
280 * return value. Older compilers/libraries might instead count it */
281 /* check whether '\0' was appended and counted */
282 static bool snPrintfTerminatorChecked
= false;
283 static bool snPrintfTerminatorCounted
= false;
284 if (!snPrintfTerminatorChecked
) {
286 snPrintfTerminatorCounted
= snprintf(testbuf
, sizeof(testbuf
),
288 snPrintfTerminatorChecked
= true;
290 if (snPrintfTerminatorCounted
) {
302 SBuf::print(std::ostream
&os
) const
304 os
.write(buf(), length());
310 SBuf::dump(std::ostream
&os
) const
315 os
<< ", offset:" << off_
319 os
<< '\'' << std::endl
;
322 // alternate implementation, based on Raw() API.
323 os
<< Raw("SBuf", buf(), length()) <<
325 ", offset:" << off_
<<
335 SBuf::setAt(size_type pos
, char toset
)
337 checkAccessBounds(pos
);
339 store_
->mem
[off_
+pos
] = toset
;
344 memcasecmp(const char *b1
, const char *b2
, SBuf::size_type len
)
348 rv
= tolower(*b1
)-tolower(*b2
);
359 SBuf::compare(const SBuf
&S
, const SBufCaseSensitive isCaseSensitive
, const size_type n
) const
362 debugs(24, 8, "length specified. substr and recurse");
363 return substr(0,n
).compare(S
.substr(0,n
),isCaseSensitive
);
366 const size_type byteCompareLen
= min(S
.length(), length());
369 debugs(24, 8, "comparing length " << byteCompareLen
);
370 if (isCaseSensitive
== caseSensitive
) {
371 rv
= memcmp(buf(), S
.buf(), byteCompareLen
);
373 rv
= memcasecmp(buf(), S
.buf(), byteCompareLen
);
376 debugs(24, 8, "result: " << rv
);
379 if (n
<= length() || n
<= S
.length()) {
380 debugs(24, 8, "same contents and bounded length. Equal");
383 if (length() == S
.length()) {
384 debugs(24, 8, "same contents and same length. Equal");
387 if (length() > S
.length()) {
388 debugs(24, 8, "lhs is longer than rhs. Result is 1");
391 debugs(24, 8, "rhs is longer than lhs. Result is -1");
396 SBuf::compare(const char *s
, const SBufCaseSensitive isCaseSensitive
, const size_type n
) const
398 // 0-length comparison is always true regardless of buffer states
404 // N-length compare MUST provide a non-NULL C-string pointer
407 // when this is a 0-length string, no need for any complexity.
413 // brute-force scan in order to avoid ever needing strlen() on a c-string.
415 const char *left
= buf();
416 const char *right
= s
;
418 // what area to scan.
419 // n may be npos, but we treat that as a huge positive value
420 size_type byteCount
= min(length(), n
);
422 // loop until we find a difference, a '\0', or reach the end of area to scan
423 if (isCaseSensitive
== caseSensitive
) {
424 while ((rv
= *left
- *right
++) == 0) {
425 if (*left
++ == '\0' || --byteCount
== 0)
429 while ((rv
= tolower(*left
) - tolower(*right
++)) == 0) {
430 if (*left
++ == '\0' || --byteCount
== 0)
435 // If we stopped scanning because we reached the end
436 // of buf() before we reached the end of s,
437 // pretend we have a 0-terminator there to compare.
438 // NP: the loop already incremented "right" ready for this comparison
439 if (!byteCount
&& length() < n
)
440 return '\0' - *right
;
442 // If we found a difference within the scan area,
443 // or we found a '\0',
444 // or all n characters were identical (and none was \0).
449 SBuf::startsWith(const SBuf
&S
, const SBufCaseSensitive isCaseSensitive
) const
451 debugs(24, 8, id
<< " startsWith " << S
.id
<< ", caseSensitive: " <<
453 if (length() < S
.length()) {
454 debugs(24, 8, "no, too short");
458 return (compare(S
, isCaseSensitive
, S
.length()) == 0);
462 SBuf::operator ==(const SBuf
& S
) const
464 debugs(24, 8, id
<< " == " << S
.id
);
465 if (length() != S
.length()) {
466 debugs(24, 8, "no, different lengths");
468 return false; //shortcut: must be equal length
470 if (store_
== S
.store_
&& off_
== S
.off_
) {
471 debugs(24, 8, "yes, same length and backing store");
473 return true; //shortcut: same store, offset and length
476 const bool rv
= (0 == memcmp(buf(), S
.buf(), length()));
477 debugs(24, 8, "returning " << rv
);
482 SBuf::operator !=(const SBuf
& S
) const
484 return !(*this == S
);
488 SBuf::consume(size_type n
)
493 n
= min(n
, length());
494 debugs(24, 8, id
<< " consume " << n
);
495 SBuf
rv(substr(0, n
));
501 SBufStats
& SBuf::GetStats()
507 SBuf::copy(char *dest
, size_type n
) const
509 size_type toexport
= min(n
,length());
510 memcpy(dest
, buf(), toexport
);
516 SBuf::rawContent() const
526 /* null-terminate the current buffer, by hand-appending a \0 at its tail but
527 * without increasing its length. May COW, the side-effect is to guarantee that
528 * the MemBlob's tail is available for us to use */
532 ++stats
.nulTerminate
;
537 SBuf::chop(size_type pos
, size_type n
)
539 if (pos
== npos
|| pos
> length())
542 if (n
== npos
|| (pos
+n
) > length())
545 // if there will be nothing left, reset the buffer while we can
546 if (pos
== length() || n
== 0) {
558 SBuf::trim(const SBuf
&toRemove
, bool atBeginning
, bool atEnd
)
562 const char *p
= bufEnd()-1;
563 while (!isEmpty() && memchr(toRemove
.buf(), *p
, toRemove
.length()) != NULL
) {
564 //current end-of-buf is in the searched set
570 const char *p
= buf();
571 while (!isEmpty() && memchr(toRemove
.buf(), *p
, toRemove
.length()) != NULL
) {
583 SBuf::substr(size_type pos
, size_type n
) const
586 rv
.chop(pos
, n
); //stats handled by callee
591 SBuf::find(char c
, size_type startPos
) const
595 if (startPos
== npos
) // can't find anything if we look past end of SBuf
598 // std::string returns npos if needle is outside hay
599 if (startPos
> length())
602 const void *i
= memchr(buf()+startPos
, (int)c
, (size_type
)length()-startPos
);
607 return (static_cast<const char *>(i
)-buf());
611 SBuf::find(const SBuf
&needle
, size_type startPos
) const
613 if (startPos
== npos
) { // can't find anything if we look past end of SBuf
618 // std::string allows needle to overhang hay but not start outside
619 if (startPos
> length()) {
624 // for empty needle std::string returns startPos
625 if (needle
.length() == 0) {
630 // if needle length is 1 use the char search
631 if (needle
.length() == 1)
632 return find(needle
[0], startPos
);
636 char *start
= buf()+startPos
;
637 char *lastPossible
= buf()+length()-needle
.length()+1;
638 char needleBegin
= needle
[0];
640 debugs(24, 7, "looking for " << needle
<< "starting at " << startPos
<<
642 while (start
< lastPossible
) {
644 debugs(24, 8, " begin=" << (void *) start
<<
645 ", lastPossible=" << (void*) lastPossible
);
646 tmp
= static_cast<char *>(memchr(start
, needleBegin
, lastPossible
-start
));
648 debugs(24, 8, "First byte not found");
651 // lastPossible guarantees no out-of-bounds with memcmp()
652 if (0 == memcmp(needle
.buf(), tmp
, needle
.length())) {
653 debugs(24, 8, "Found at " << (tmp
-buf()));
658 debugs(24, 8, "not found");
663 SBuf::rfind(const SBuf
&needle
, SBuf::size_type endPos
) const
665 // when the needle is 1 char, use the 1-char rfind()
666 if (needle
.length() == 1)
667 return rfind(needle
[0], endPos
);
671 // needle is bigger than haystack, impossible find
672 if (length() < needle
.length())
675 // if startPos is npos, std::string scans from the end of hay
676 if (endPos
== npos
|| endPos
> length()-needle
.length())
677 endPos
= length()-needle
.length();
679 // an empty needle found at the end of the haystack
680 if (needle
.length() == 0)
683 char *bufBegin
= buf();
684 char *cur
= bufBegin
+endPos
;
685 const char needleBegin
= needle
[0];
686 while (cur
>= bufBegin
) {
687 if (*cur
== needleBegin
) {
688 if (0 == memcmp(needle
.buf(), cur
, needle
.length())) {
699 SBuf::rfind(char c
, SBuf::size_type endPos
) const
703 // shortcut: haystack is empty, can't find anything by definition
707 // on npos input std::string compares last octet of hay
708 if (endPos
== npos
|| endPos
>= length()) {
711 // NP: off-by-one weirdness:
712 // endPos is an offset ... 0-based
713 // length() is a count ... 1-based
714 // memrhr() requires a 1-based count of space to scan.
721 const void *i
= memrchr(buf(), (int)c
, (size_type
)endPos
);
726 return (static_cast<const char *>(i
)-buf());
730 SBuf::findFirstOf(const CharacterSet
&set
, size_type startPos
) const
734 if (startPos
== npos
)
737 if (startPos
>= length())
740 debugs(24, 7, "first of characterset " << set
.name
<< " in id " << id
);
741 char *cur
= buf()+startPos
;
742 const char *bufend
= bufEnd();
743 while (cur
< bufend
) {
748 debugs(24, 7, "not found");
753 SBuf::findFirstNotOf(const CharacterSet
&set
, size_type startPos
) const
757 if (startPos
== npos
)
760 if (startPos
>= length())
763 debugs(24, 7, "first not of characterset " << set
.name
<< " in id " << id
);
764 char *cur
= buf()+startPos
;
765 const char *bufend
= bufEnd();
766 while (cur
< bufend
) {
771 debugs(24, 7, "not found");
776 SBuf::findLastOf(const CharacterSet
&set
, size_type endPos
) const
783 if (endPos
== npos
|| endPos
>= length())
784 endPos
= length() - 1;
786 debugs(24, 7, "last of characterset " << set
.name
<< " in id " << id
);
787 const char *start
= buf();
788 for (const char *cur
= start
+ endPos
; cur
>= start
; --cur
) {
792 debugs(24, 7, "not found");
797 SBuf::findLastNotOf(const CharacterSet
&set
, size_type endPos
) const
804 if (endPos
== npos
|| endPos
>= length())
805 endPos
= length() - 1;
807 debugs(24, 7, "last not of characterset " << set
.name
<< " in id " << id
);
808 const char *start
= buf();
809 for (const char *cur
= start
+ endPos
; cur
>= start
; --cur
) {
813 debugs(24, 7, "not found");
820 debugs(24, 8, "\"" << *this << "\"");
821 for (size_type j
= 0; j
< length(); ++j
) {
822 const int c
= (*this)[j
];
824 setAt(j
, tolower(c
));
826 debugs(24, 8, "result: \"" << *this << "\"");
833 debugs(24, 8, "\"" << *this << "\"");
834 for (size_type j
= 0; j
< length(); ++j
) {
835 const int c
= (*this)[j
];
837 setAt(j
, toupper(c
));
839 debugs(24, 8, "result: \"" << *this << "\"");
843 /** re-allocate the backing store of the SBuf.
845 * If there are contents in the SBuf, they will be copied over.
846 * NO verifications are made on the size parameters, it's up to the caller to
847 * make sure that the new size is big enough to hold the copied contents.
848 * The re-allocated storage MAY be bigger than the requested size due to size-chunking
849 * algorithms in MemBlock, it is guaranteed NOT to be smaller.
852 SBuf::reAlloc(size_type newsize
)
854 debugs(24, 8, id
<< " new size: " << newsize
);
855 Must(newsize
<= maxSize
);
856 // TODO: Consider realloc(3)ing in some cases instead.
857 MemBlob::Pointer newbuf
= new MemBlob(newsize
);
859 newbuf
->append(buf(), length());
860 ++stats
.cowAllocCopy
;
862 ++stats
.cowJustAlloc
;
866 debugs(24, 7, id
<< " new store capacity: " << store_
->capacity
);
870 SBuf::lowAppend(const char * memArea
, size_type areaSize
)
872 rawSpace(areaSize
); //called method also checks n <= maxSize()
873 store_
->append(memArea
, areaSize
);
880 * copy-on-write: make sure that we are the only holder of the backing store.
881 * If not, reallocate. If a new size is specified, and it is greater than the
882 * current length, the backing store will be extended as needed
885 SBuf::cow(SBuf::size_type newsize
)
887 debugs(24, 8, id
<< " new size:" << newsize
);
888 if (newsize
== npos
|| newsize
< length())
891 if (store_
->LockCount() == 1) {
892 // MemBlob::size reflects past owners. Refresh to maximize spaceSize().
893 store_
->syncSize(off_
+ length());
895 const auto availableSpace
= spaceSize();
896 const auto neededSpace
= newsize
- length();
897 if (neededSpace
<= availableSpace
) {
898 debugs(24, 8, id
<< " no cow needed; have " << availableSpace
);
902 // consume idle leading space if doing so avoids reallocation
903 // this case is typical for fill-consume-fill-consume-... I/O buffers
904 if (neededSpace
<= availableSpace
+ off_
) {
905 debugs(24, 8, id
<< " no cow after shifting " << off_
<< " to get " << (availableSpace
+ off_
));
906 store_
->consume(off_
);
909 assert(neededSpace
<= spaceSize());