From 558cf20d0dacf9436959ec1ec6a5acb60152ebbb Mon Sep 17 00:00:00 2001 From: Jun He Date: Sat, 30 Jul 2022 01:27:20 +0800 Subject: [PATCH] decomp: add prefetch for matched seq on aarch64 (#3164) match is used for following sequence copy. It is only updated when extDict is needed, which is a low probability case. So it can be prefetched to reduce cache miss. The benchmarks on various Arm platforms showed uplift from 1% ~ 14% with gcc-11/clang-14. Signed-off-by: Jun He Change-Id: If201af4799d2455d74c79f8387404439d7f684ae --- lib/decompress/zstd_decompress_block.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 6df4c3849..20211f72b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -967,6 +967,11 @@ size_t ZSTD_execSequence(BYTE* op, assert(op != NULL /* Precondition */); assert(oend_w < oend /* No underflow */); + +#if defined(__aarch64__) + /* prefetch sequence starting from match that will be used for copy later */ + PREFETCH_L1(match); +#endif /* Handle edge cases in a slow path: * - Read beyond end of literals * - Match end is within WILDCOPY_OVERLIMIT of oend -- 2.47.2