return ZSTD_convertBlockSequences_internal(cctx, inSeqs, nbSequences, 0);
}
+#if 0 && defined(__AVX2__)
+
+/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
+#if defined(__GNUC__)
+# define ALIGNED32 __attribute__((aligned(32)))
+#else
+# define ALIGNED32
+#endif
+
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
- size_t blockSize = 0;
+ size_t i;
+ __m256i sumVec; /* accumulates match+lit in 32-bit lanes */
+ __m256i mask; /* shuffling control */
+ ALIGNED32 int tmp[8]; /* temporary buffer for reduction */
+ uint64_t sum;
+ int k;
+
+ sumVec = _mm256_setzero_si256();
+ mask = _mm256_setr_epi32(
+ 1,5, /* match(0), match(1) */
+ 2,6, /* lit(0), lit(1) */
+ 1,5, /* match(0), match(1) */
+ 2,6 /* lit(0), lit(1) */
+ );
+
+ /* Process 2 structs (32 bytes) at a time */
+ for (i = 0; i + 2 <= count; i += 2) {
+ /* Load two consecutive MyStructs (8×4 = 32 bytes) */
+ __m256i data = _mm256_loadu_si256((const __m256i*)&arr[i]);
+ /* Shuffle out lanes 1,2,5,6 => match(0), match(1), lit(0), lit(1), repeated */
+ __m256i selected = _mm256_permutevar8x32_epi32(data, mask);
+ /* Accumulate in sumVec */
+ sumVec = _mm256_add_epi32(sumVec, selected);
+ }
+
+ /* Horizontal reduction of sumVec */
+ _mm256_store_si256((__m256i*)tmp, sumVec);
+ sum = 0;
+ for (k = 0; k < 8; k++) {
+ sum += (uint64_t)tmp[k]; /* each lane is match+lit from pairs, repeated twice */
+ }
+
+ /* Handle the leftover (if count is odd) */
+ for (; i < count; i++) {
+ sum += arr[i].matchLength;
+ sum += arr[i].litLength;
+ }
+
+ return sum;
+}
+
+#else
+
+BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
+{
+ size_t totalMatchSize = 0;
size_t litSize = 0;
size_t n;
assert(seqs);
for (n=0; n<nbSeqs; n++) {
- blockSize += seqs[n].matchLength + seqs[n].litLength;
+ totalMatchSize += seqs[n].matchLength;
litSize += seqs[n].litLength;
if (seqs[n].matchLength == 0) {
assert(seqs[n].offset == 0);
}
{ BlockSummary bs;
bs.nbSequences = n+1;
- bs.blockSize = blockSize;
+ bs.blockSize = litSize + totalMatchSize;
bs.litSize = litSize;
return bs;
}
}
+#endif
static size_t