#else
+/*
+ * The function assumes `litMatchLength` is a packed 64-bit value where the
+ * lower 32 bits represent the match length. The check varies based on the
+ * system's endianness:
+ * - On little-endian systems, it verifies if the entire 64-bit value is at most
+ * 0xFFFFFFFF, indicating the match length (lower 32 bits) is zero.
+ * - On big-endian systems, it directly checks if the lower 32 bits are zero.
+ *
+ * @returns 1 if the match length is zero, 0 otherwise.
+ */
+FORCE_INLINE_TEMPLATE int matchLengthHalfIsZero(U64 litMatchLength)
+{
+ if (MEM_isLittleEndian()) {
+ return litMatchLength <= 0xFFFFFFFFULL;
+ } else {
+ return (U32)litMatchLength == 0;
+ }
+}
+
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
- size_t totalMatchSize = 0;
- size_t litSize = 0;
- size_t n;
+ /* Use multiple accumulators for efficient use of wide out-of-order machines. */
+ U64 litMatchSize0 = 0;
+ U64 litMatchSize1 = 0;
+ U64 litMatchSize2 = 0;
+ U64 litMatchSize3 = 0;
+ size_t n = 0;
+
+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, litLength) + 4 == offsetof(ZSTD_Sequence, matchLength));
+ ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) + 4 == offsetof(ZSTD_Sequence, rep));
assert(seqs);
- for (n=0; n<nbSeqs; n++) {
- totalMatchSize += seqs[n].matchLength;
- litSize += seqs[n].litLength;
- if (seqs[n].matchLength == 0) {
+
+ if (nbSeqs > 3) {
+ /* Process the input in 4 independent streams to reach high throughput. */
+ do {
+ /* Load `litLength` and `matchLength` as a packed `U64`. It is safe
+ * to use 64-bit unsigned arithmetic here because the sum of `litLength`
+ * and `matchLength` cannot exceed the block size, so the 32-bit
+ * subparts will never overflow. */
+ U64 litMatchLength = MEM_read64(&seqs[n].litLength);
+ litMatchSize0 += litMatchLength;
+ if (matchLengthHalfIsZero(litMatchLength)) {
+ assert(seqs[n].offset == 0);
+ goto _out;
+ }
+
+ litMatchLength = MEM_read64(&seqs[n + 1].litLength);
+ litMatchSize1 += litMatchLength;
+ if (matchLengthHalfIsZero(litMatchLength)) {
+ n += 1;
+ assert(seqs[n].offset == 0);
+ goto _out;
+ }
+
+ litMatchLength = MEM_read64(&seqs[n + 2].litLength);
+ litMatchSize2 += litMatchLength;
+ if (matchLengthHalfIsZero(litMatchLength)) {
+ n += 2;
+ assert(seqs[n].offset == 0);
+ goto _out;
+ }
+
+ litMatchLength = MEM_read64(&seqs[n + 3].litLength);
+ litMatchSize3 += litMatchLength;
+ if (matchLengthHalfIsZero(litMatchLength)) {
+ n += 3;
+ assert(seqs[n].offset == 0);
+ goto _out;
+ }
+
+ n += 4;
+ } while(n < nbSeqs - 3);
+ }
+
+ for (; n < nbSeqs; n++) {
+ U64 litMatchLength = MEM_read64(&seqs[n].litLength);
+ litMatchSize0 += litMatchLength;
+ if (matchLengthHalfIsZero(litMatchLength)) {
assert(seqs[n].offset == 0);
- break;
+ goto _out;
}
}
- if (n==nbSeqs) {
- BlockSummary bs;
+ /* At this point n == nbSeqs, so no end terminator. */
+ { BlockSummary bs;
bs.nbSequences = ERROR(externalSequences_invalid);
return bs;
}
+_out:
+ litMatchSize0 += litMatchSize1 + litMatchSize2 + litMatchSize3;
{ BlockSummary bs;
- bs.nbSequences = n+1;
- bs.blockSize = litSize + totalMatchSize;
- bs.litSize = litSize;
+ bs.nbSequences = n + 1;
+ if (MEM_isLittleEndian()) {
+ bs.litSize = (U32)litMatchSize0;
+ bs.blockSize = bs.litSize + (litMatchSize0 >> 32);
+ } else {
+ bs.litSize = litMatchSize0 >> 32;
+ bs.blockSize = bs.litSize + (U32)litMatchSize0;
+ }
return bs;
}
}
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
#include "threading.h" /* ZSTD_pthread_create, ZSTD_pthread_join */
#include "compress/hist.h" /* HIST_count_wksp */
+#include "compress/zstd_compress_internal.h" /* ZSTD_get1BlockSummary */
/*-************************************
DISPLAYLEVEL(3, "OK \n");
}
+static unsigned test_get1BlockSummary(unsigned testNb)
+{
+ static const ZSTD_Sequence nseqs[] = {
+ { 10, 2, 4, 1 },
+ { 20, 3, 5, 2 },
+ { 30, 6, 8, 3 },
+ { 40, 7, 9, 4 },
+ { 50, 10, 12, 5 },
+ { 60, 11, 13, 6 },
+ { 0, 14, 0, 7 },
+ { 70, 15, 17, 8 },
+ { 80, 16, 18, 9 },
+ { 90, 19, 21, 1 },
+ { 99, 20, 22, 2 },
+ };
+ static const BlockSummary blocks[] = {
+ { 7, 104, 53 },
+ { 6, 98, 51 },
+ { 5, 90, 48 },
+ { 4, 76, 42 },
+ { 3, 60, 35 },
+ { 2, 38, 25 },
+ { 1, 14, 14 },
+ };
+ size_t i;
+
+ DISPLAYLEVEL(3, "test%3u : ZSTD_get1BlockSummary with empty array : ", testNb++);
+ {
+ BlockSummary bs = ZSTD_get1BlockSummary(nseqs, 0);
+ CHECK_EQ(bs.nbSequences, ERROR(externalSequences_invalid));
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3u : ZSTD_get1BlockSummary with 1 literal only : ", testNb++);
+ {
+ static const ZSTD_Sequence seqs[] = { { 0, 5, 0, 0 } };
+ BlockSummary bs = ZSTD_get1BlockSummary(seqs, 1);
+ CHECK_EQ(bs.nbSequences, 1);
+ CHECK_EQ(bs.litSize, 5);
+ CHECK_EQ(bs.blockSize, 5);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3u : ZSTD_get1BlockSummary with no terminator : ", testNb++);
+ {
+ static const ZSTD_Sequence seqs[] = { { 10, 2, 4, 0 }, { 20, 3, 5, 0 } };
+ BlockSummary bs = ZSTD_get1BlockSummary(seqs, 2);
+ CHECK_EQ(bs.nbSequences, ERROR(externalSequences_invalid));
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ DISPLAYLEVEL(3, "test%3u : ZSTD_get1BlockSummary with rep ignored : ", testNb++);
+ {
+ static const ZSTD_Sequence seqs[] = {
+ { 10, 2, 4, 2 },
+ { 10, 3, 5, 2 },
+ { 0, 7, 0, 3 },
+ };
+ BlockSummary bs = ZSTD_get1BlockSummary(seqs, 3);
+ CHECK_EQ(bs.nbSequences, 3);
+ CHECK_EQ(bs.litSize, 2 + 3 + 7);
+ CHECK_EQ(bs.blockSize, (4 + 5) + (2 + 3 + 7));
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
+ assert(COUNTOF(nseqs) > COUNTOF(blocks));
+ for (i = 0; i < COUNTOF(blocks); ++i) {
+ BlockSummary bs;
+ DISPLAYLEVEL(3, "test%3u : ZSTD_get1BlockSummary with %u inputs : ",
+ testNb++, (unsigned)(COUNTOF(nseqs) - i));
+ bs = ZSTD_get1BlockSummary(nseqs + i, COUNTOF(nseqs) - i);
+ CHECK_EQ(bs.nbSequences, blocks[i].nbSequences);
+ CHECK_EQ(bs.litSize, blocks[i].litSize);
+ CHECK_EQ(bs.blockSize, blocks[i].blockSize);
+ DISPLAYLEVEL(3, "OK \n");
+ }
+
+ return testNb;
+}
+
/* ============================================================= */
static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
+ testNb = test_get1BlockSummary(testNb);
+
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
{
const size_t srcSize = 497000;