#endif
/* prefetch
- * can be disabled, by declaring NO_PREFETCH macro
- * All prefetch invocations use a single default locality 2,
- * generating instruction prefetcht1,
- * which, according to Intel, means "load data into L2 cache".
- * This is a good enough "middle ground" for the time being,
- * though in theory, it would be better to specialize locality depending on data being prefetched.
- * Tests could not determine any sensible difference based on locality value. */
+ * can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
+# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH(ptr) (void)(ptr) /* disabled */
#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# else
-# define PREFETCH(ptr) (void)(ptr) /* disabled */
+# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
+# define PREFETCH(ptr) (void)(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
- PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+ PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb & STORED_SEQS_MASK] = sequence;
op += oneSeqSize;
}