]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Fix performance regression on aarch64 with clang 1973/head
authorNick Terrell <terrelln@fb.com>
Fri, 24 Jan 2020 00:18:52 +0000 (16:18 -0800)
committerNick Terrell <terrelln@fb.com>
Fri, 24 Jan 2020 01:31:14 +0000 (17:31 -0800)
lib/decompress/huf_decompress.c
lib/decompress/zstd_decompress_block.c

index 732e1c93bfcab48b3be79a4fa117d7d44dc6a0ba..e599070ae44d438315716772ce672371b55e67fe 100644 (file)
@@ -817,7 +817,7 @@ HUF_decompress4X2_usingDTable_internal_body(
 
         /* 16-32 symbols per loop (4-8 symbols per stream) */
         for ( ; (endSignal) & (op4 < olimit); ) {
-#ifdef __clang__
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
@@ -855,10 +855,11 @@ HUF_decompress4X2_usingDTable_internal_body(
             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+            endSignal = LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
 #endif
         }
 
index ce43c5255455c1cf1d5f7eec2d8078b4019e40a2..33c812d04f473e99048818f64cc4fbe65cdcc921 100644 (file)
@@ -580,7 +580,7 @@ typedef struct {
  *  Precondition: *ip <= *op
  *  Postcondition: *op - *op >= 8
  */
-static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
     assert(*ip <= *op);
     if (offset < 8) {
         /* close range match, overlap */