set(ARCHDIR "arch/x86")
add_definitions(-DUNALIGNED_OK)
add_feature_info(SSE2 1 "Support the SSE2 instruction set, using \"${SSE2FLAG}\"")
-elseif("${ARCH}" MATCHES "arm")
+elseif("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64")
set(ARCHDIR "arch/arm")
add_definitions(-DUNALIGNED_OK)
-elseif("${ARCH}" MATCHES "aarch64")
- set(ARCHDIR "arch/aarch64")
- add_definitions(-DUNALIGNED_OK)
else()
message(STATUS "No optimized architecture: using ${ARCHDIR}")
endif()
inftrees.o: $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h $(SRCDIR)/inftrees.h
trees.o: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h $(SRCDIR)/trees.h
zutil.o: $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
-arch/aarch64/adler32_neon.o: $(SRCDIR)/arch/aarch64/adler32_neon.h
-arch/aarch64/crc32_acle.o: zconf$(SUFFIX).h
-arch/aarch64/fill_window_arm.o: $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
-arch/aarch64/insert_string_acle.o: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
arch/arm/adler32_neon.o: $(SRCDIR)/arch/arm/adler32_neon.h
arch/arm/crc32_acle.o: zconf$(SUFFIX).h
arch/arm/fill_window_arm.o: $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
inftrees.lo: $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h $(SRCDIR)/inftrees.h
trees.lo: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h $(SRCDIR)/trees.h
zutil.lo: $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
-arch/aarch64/adler32_neon.lo: $(SRCDIR)/arch/aarch64/adler32_neon.h
-arch/aarch64/crc32_acle.lo: zconf$(SUFFIX).h
-arch/aarch64/fill_window_arm.lo: $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
-arch/aarch64/insert_string_acle.lo: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
arch/arm/adler32_neon.lo: $(SRCDIR)/arch/arm/adler32_neon.h
arch/arm/crc32_acle.lo: zconf$(SUFFIX).h
arch/arm/fill_window_arm.lo: $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h
+++ /dev/null
-# Makefile for zlib
-# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
-# For conditions of distribution and use, see copyright notice in zlib.h
-
-CC=
-CFLAGS=
-SFLAGS=
-INCLUDES=
-SUFFIX=
-
-SRCDIR=.
-SRCTOP=../..
-TOPDIR=$(SRCTOP)
-
-all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
-
-adler32_neon.o: $(SRCDIR)/adler32_neon.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
-
-adler32_neon.lo: $(SRCDIR)/adler32_neon.c
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
-
-armfeature.o: $(SRCDIR)/armfeature.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
-
-armfeature.lo: $(SRCDIR)/armfeature.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
-
-crc32_acle.o: $(SRCDIR)/crc32_acle.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
-
-crc32_acle.lo: $(SRCDIR)/crc32_acle.c
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
-
-fill_window_arm.o: ${SRCDIR}/fill_window_arm.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
-
-fill_window_arm.lo: ${SRCDIR}/fill_window_arm.c
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
-
-insert_string_acle.o: $(SRCDIR)/insert_string_acle.c
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
-
-insert_string_acle.lo: $(SRCDIR)/insert_string_acle.c
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
-
-mostlyclean: clean
-clean:
- rm -f *.o *.lo *~
- rm -rf objs
- rm -f *.gcda *.gcno *.gcov
-
-distclean:
- rm -f Makefile
-
-depend:
- makedepend -Y -- $(CFLAGS) -- $(SRCDIR)/*.c
- makedepend -Y -a -o.lo -- $(SFLAGS) -- $(SRCDIR)/*.c
- @sed "s=^$(SRCDIR)/\([a-zA-Z0-9_]*\.\(lo\|o\):\)=\1=g" < Makefile > Makefile.tmp
- @mv -f Makefile.tmp Makefile
-
-# DO NOT DELETE THIS LINE -- make depend depends on it.
-
-adler32_neon.o: $(SRCDIR)/adler32_neon.h
-crc32_acle.o: $(TOPDIR)/zconf$(SUFFIX).h
-fill_window_arm.o: $(SRCTOP)/deflate.h $(SRCTOP)/deflate_p.h $(SRCTOP)/functable.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib$(SUFFIX).h $(TOPDIR)/zconf$(SUFFIX).h
-insert_string_acle.o: $(SRCTOP)/deflate.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib$(SUFFIX).h $(TOPDIR)/zconf$(SUFFIX).h
-
-adler32_neon.lo: $(SRCDIR)/adler32_neon.h
-crc32_acle.lo: $(TOPDIR)/zconf$(SUFFIX).h
-fill_window_arm.lo: $(SRCTOP)/deflate.h $(SRCTOP)/deflate_p.h $(SRCTOP)/functable.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib$(SUFFIX).h $(TOPDIR)/zconf$(SUFFIX).h
-insert_string_acle.lo: $(SRCTOP)/deflate.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib$(SUFFIX).h $(TOPDIR)/zconf$(SUFFIX).h
-
+++ /dev/null
-/* Copyright (C) 1995-2011, 2016 Mark Adler
- * Copyright (C) 2017 ARM Holdings Inc.
- * Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
-#include "adler32_neon.h"
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#include <arm_neon.h>
-
-static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
- static const uint8_t taps[32] = {
- 32, 31, 30, 29, 28, 27, 26, 25,
- 24, 23, 22, 21, 20, 19, 18, 17,
- 16, 15, 14, 13, 12, 11, 10, 9,
- 8, 7, 6, 5, 4, 3, 2, 1 };
-
- uint32x2_t adacc2, s2acc2, as;
- uint8x16_t t0 = vld1q_u8(taps), t1 = vld1q_u8(taps + 16);
-
- uint32x4_t adacc = vdupq_n_u32(0), s2acc = vdupq_n_u32(0);
- adacc = vsetq_lane_u32(s[0], adacc, 0);
- s2acc = vsetq_lane_u32(s[1], s2acc, 0);
-
- while (len >= 2) {
- uint8x16_t d0 = vld1q_u8(buf), d1 = vld1q_u8(buf + 16);
- uint16x8_t adler, sum2;
- s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 5));
- adler = vpaddlq_u8( d0);
- adler = vpadalq_u8(adler, d1);
- sum2 = vmull_u8( vget_low_u8(t0), vget_low_u8(d0));
- sum2 = vmlal_u8(sum2, vget_high_u8(t0), vget_high_u8(d0));
- sum2 = vmlal_u8(sum2, vget_low_u8(t1), vget_low_u8(d1));
- sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d1));
- adacc = vpadalq_u16(adacc, adler);
- s2acc = vpadalq_u16(s2acc, sum2);
- len -= 2;
- buf += 32;
- }
-
- while (len > 0) {
- uint8x16_t d0 = vld1q_u8(buf);
- uint16x8_t adler, sum2;
- s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 4));
- adler = vpaddlq_u8(d0);
- sum2 = vmull_u8( vget_low_u8(t1), vget_low_u8(d0));
- sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d0));
- adacc = vpadalq_u16(adacc, adler);
- s2acc = vpadalq_u16(s2acc, sum2);
- buf += 16;
- len--;
- }
-
- adacc2 = vpadd_u32(vget_low_u32(adacc), vget_high_u32(adacc));
- s2acc2 = vpadd_u32(vget_low_u32(s2acc), vget_high_u32(s2acc));
- as = vpadd_u32(adacc2, s2acc2);
- s[0] = vget_lane_u32(as, 0);
- s[1] = vget_lane_u32(as, 1);
-}
-
-static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t len) {
- /* Oldie K&R code integration. */
- unsigned int i;
- for (i = 0; i < len; ++i) {
- pair[0] += buf[i];
- pair[1] += pair[0];
- }
-}
-
-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
- if (!buf)
- return 1L;
-
- /* The largest prime smaller than 65536. */
- const uint32_t M_BASE = 65521;
- /* This is the threshold where doing accumulation may overflow. */
- const int M_NMAX = 5552;
-
- uint32_t sum2;
- uint32_t pair[2];
- int n = M_NMAX;
- unsigned int done = 0;
- /* Oldie K&R code integration. */
- unsigned int i;
-
- /* Split Adler-32 into component sums, it can be supplied by
- * the caller sites (e.g. in a PNG file).
- */
- sum2 = (adler >> 16) & 0xffff;
- adler &= 0xffff;
- pair[0] = adler;
- pair[1] = sum2;
-
- for (i = 0; i < len; i += n) {
- if ((i + n) > len)
- n = len - i;
-
- if (n < 16)
- break;
-
- NEON_accum32(pair, buf + i, n / 16);
- pair[0] %= M_BASE;
- pair[1] %= M_BASE;
-
- done += (n / 16) * 16;
- }
-
- /* Handle the tail elements. */
- if (done < len) {
- NEON_handle_tail(pair, (buf + done), len - done);
- pair[0] %= M_BASE;
- pair[1] %= M_BASE;
- }
-
- /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */
- return (pair[1] << 16) | pair[0];
-}
-#endif
+++ /dev/null
-/* Copyright (C) 1995-2011, 2016 Mark Adler
- * Copyright (C) 2017 ARM Holdings Inc.
- * Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
-#ifndef __ADLER32_NEON__
-#define __ADLER32_NEON__
-
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-// Depending on the compiler flavor, size_t may be defined in one or the other header. See:
-// http://stackoverflow.com/questions/26410466/gcc-linaro-compiler-throws-error-unknown-type-name-size-t
-#include <stdint.h>
-#include <stddef.h>
-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
-#endif
-#endif
+++ /dev/null
-/* arm.h -- check for ARM features.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#ifndef ARM_H_
-#define ARM_H_
-
-extern int arm_cpu_has_neon;
-extern int arm_cpu_has_crc32;
-
-void ZLIB_INTERNAL arm_check_features(void);
-
-#endif /* ARM_H_ */
+++ /dev/null
-#include "zutil.h"
-
-#if defined(__linux__)
-# include <sys/auxv.h>
-# include <asm/hwcap.h>
-#endif
-
-static int arm_has_crc32() {
-#if defined(__linux__) && defined(HWCAP_CRC32)
- return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0;
-#elif defined(ARM_NOCHECK_ACLE)
- return 1;
-#else
- return 0;
-#endif
-}
-
-ZLIB_INTERNAL int arm_cpu_has_neon;
-ZLIB_INTERNAL int arm_cpu_has_crc32;
-
-void ZLIB_INTERNAL arm_check_features(void) {
- arm_cpu_has_neon = 1; /* always available */
- arm_cpu_has_crc32 = arm_has_crc32();
-}
+++ /dev/null
-/* crc32_acle.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
- * Copyright (C) 2016 Yang Zhang
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
-*/
-
-#ifdef __ARM_FEATURE_CRC32
-#include <arm_acle.h>
-#ifdef ZLIB_COMPAT
-# include <zconf.h>
-#else
-# include <zconf-ng.h>
-#endif
-#ifdef __linux__
-# include <stddef.h>
-#endif
-
-uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
- register uint32_t c;
- register const uint16_t *buf2;
- register const uint32_t *buf4;
- register const uint64_t *buf8;
-
- c = ~crc;
- if (len && ((ptrdiff_t)buf & 1)) {
- c = __crc32b(c, *buf++);
- len--;
- }
-
- if ((len > 2) && ((ptrdiff_t)buf & 2)) {
- buf2 = (const uint16_t *) buf;
- c = __crc32h(c, *buf2++);
- len -= 2;
- buf4 = (const uint32_t *) buf2;
- } else {
- buf4 = (const uint32_t *) buf;
- }
-
- if ((len > 4) && ((ptrdiff_t)buf & 4)) {
- c = __crc32w(c, *buf4++);
- len -= 4;
- }
-
- buf8 = (const uint64_t *) buf4;
-
-#ifdef UNROLL_MORE
- while (len >= 32) {
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- len -= 32;
- }
-#endif
-
- while (len >= 8) {
- c = __crc32d(c, *buf8++);
- len -= 8;
- }
-
- if (len >= 4) {
- buf4 = (const uint32_t *) buf8;
- c = __crc32w(c, *buf4++);
- len -= 4;
- buf2 = (const uint16_t *) buf4;
- } else {
- buf2 = (const uint16_t *) buf8;
- }
-
- if (len >= 2) {
- c = __crc32h(c, *buf2++);
- len -= 2;
- }
-
- if (len) {
- buf = (const unsigned char *) buf2;
- c = __crc32b(c, *buf);
- }
-
- c = ~c;
- return c;
-}
-#endif
+++ /dev/null
-/* fill_window_arm.c -- Optimized hash table shifting for ARM with support for NEON instructions
- * Copyright (C) 2017 Mika T. Lindqvist
- *
- * Authors:
- * Mika T. Lindqvist <postmaster@raasu.org>
- * Jun He <jun.he@arm.com>
- *
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#include "zbuild.h"
-#include "deflate.h"
-#include "deflate_p.h"
-#include "functable.h"
-
-extern ZLIB_INTERNAL int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
-
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#include <arm_neon.h>
-
-/* SIMD version of hash_chain rebase */
-static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
- register uint16x8_t v, *p;
- register size_t n;
-
- size_t size = entries*sizeof(table[0]);
- Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
-
- Assert(sizeof(Pos) == 2, "Wrong Pos size");
- v = vdupq_n_u16(window_size);
-
- p = (uint16x8_t *)table;
- n = size / (sizeof(uint16x8_t) * 8);
- do {
- p[0] = vqsubq_u16(p[0], v);
- p[1] = vqsubq_u16(p[1], v);
- p[2] = vqsubq_u16(p[2], v);
- p[3] = vqsubq_u16(p[3], v);
- p[4] = vqsubq_u16(p[4], v);
- p[5] = vqsubq_u16(p[5], v);
- p[6] = vqsubq_u16(p[6], v);
- p[7] = vqsubq_u16(p[7], v);
- p += 8;
- } while (--n);
-}
-#else
-/* generic version for hash rebase */
-static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
- unsigned int i;
- for (i = 0; i < entries; i++) {
- table[i] = (table[i] >= window_size) ? (table[i] - window_size) : NIL;
- }
-}
-#endif
-
-void fill_window_arm(deflate_state *s) {
- register unsigned n;
- unsigned long more; /* Amount of free space at the end of the window. */
- unsigned int wsize = s->w_size;
-
- Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
- do {
- more = s->window_size - s->lookahead - s->strstart;
-
- /* If the window is almost full and there is insufficient lookahead,
- * move the upper half to the lower one to make room in the upper half.
- */
- if (s->strstart >= wsize+MAX_DIST(s)) {
- memcpy(s->window, s->window+wsize, wsize);
- s->match_start -= wsize;
- s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
- s->block_start -= wsize;
-
- /* Slide the hash table (could be avoided with 32 bit values
- at the expense of memory usage). We slide even when level == 0
- to keep the hash table consistent if we switch back to level > 0
- later. (Using level 0 permanently is not an optimal usage of
- zlib, so we don't care about this pathological case.)
- */
-
- slide_hash_chain(s->head, s->hash_size, wsize);
- slide_hash_chain(s->prev, wsize, wsize);
- more += wsize;
- }
- if (s->strm->avail_in == 0)
- break;
-
- /* If there was no sliding:
- * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
- * more == window_size - lookahead - strstart
- * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
- * => more >= window_size - 2*WSIZE + 2
- * In the BIG_MEM or MMAP case (not yet supported),
- * window_size == input_size + MIN_LOOKAHEAD &&
- * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
- * Otherwise, window_size == 2*WSIZE so more >= 2.
- * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
- */
- Assert(more >= 2, "more < 2");
-
- n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
- s->lookahead += n;
-
- /* Initialize the hash value now that we have some input: */
- if (s->lookahead + s->insert >= MIN_MATCH) {
- unsigned int str = s->strstart - s->insert;
- unsigned int insert_cnt = s->insert;
- unsigned int slen;
-
- s->ins_h = s->window[str];
-
- if (unlikely(s->lookahead < MIN_MATCH))
- insert_cnt += s->lookahead - MIN_MATCH;
- slen = insert_cnt;
- if (str >= (MIN_MATCH - 2))
- {
- str += 2 - MIN_MATCH;
- insert_cnt += MIN_MATCH - 2;
- }
- if (insert_cnt > 0)
- {
- functable.insert_string(s, str, insert_cnt);
- s->insert -= slen;
- }
- }
- /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
- * but this is not important since only literal bytes will be emitted.
- */
- } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
- /* If the WIN_INIT bytes after the end of the current data have never been
- * written, then zero those bytes in order to avoid memory check reports of
- * the use of uninitialized (or uninitialised as Julian writes) bytes by
- * the longest match routines. Update the high water mark for the next
- * time through here. WIN_INIT is set to MAX_MATCH since the longest match
- * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
- */
- if (s->high_water < s->window_size) {
- unsigned long curr = s->strstart + (unsigned long)s->lookahead;
- unsigned long init;
-
- if (s->high_water < curr) {
- /* Previous high water mark below current data -- zero WIN_INIT
- * bytes or up to end of window, whichever is less.
- */
- init = s->window_size - curr;
- if (init > WIN_INIT)
- init = WIN_INIT;
- memset(s->window + curr, 0, init);
- s->high_water = curr + init;
- } else if (s->high_water < curr + WIN_INIT) {
- /* High water mark at or above current data, but below current data
- * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
- * to end of window, whichever is less.
- */
- init = curr + WIN_INIT;
- if (init > s->window_size)
- init = s->window_size;
- init -= s->high_water;
- memset(s->window + s->high_water, 0, init);
- s->high_water += init;
- }
- }
-
- Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
-}
+++ /dev/null
-/* insert_string_acle.c -- insert_string variant using ACLE's CRC instructions
- *
- * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- */
-
-#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
-#include <arm_acle.h>
-#include "zbuild.h"
-#include "deflate.h"
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN assertion: all calls to to INSERT_STRING are made with consecutive
- * input characters and the first MIN_MATCH bytes of str are valid
- * (except for the last MIN_MATCH-1 bytes of the input file).
- */
-Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
- Pos p, lp, ret;
-
- if (unlikely(count == 0)) {
- return s->prev[str & s->w_mask];
- }
-
- ret = 0;
- lp = str + count - 1; /* last position */
-
- for (p = str; p <= lp; p++) {
- unsigned *ip, val, h, hm;
-
- ip = (unsigned *)&s->window[p];
- val = *ip;
-
- if (s->level >= TRIGGER_LEVEL)
- val &= 0xFFFFFF;
-
- h = __crc32w(0, val);
- hm = h & s->hash_mask;
-
- Pos head = s->head[hm];
- if (head != p) {
- s->prev[p & s->w_mask] = head;
- s->head[hm] = p;
- if (p == lp)
- ret = head;
- } else if (p == lp) {
- ret = p;
- }
- }
- return ret;
-}
-#endif
#endif
}
-static int arm_has_neon()
+static inline int arm_has_neon()
{
#if defined(__linux__) && defined(HWCAP_NEON)
return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
ZLIB_INTERNAL int arm_cpu_has_crc32;
void ZLIB_INTERNAL arm_check_features(void) {
+#if defined(__aarch64__)
+ arm_cpu_has_neon = 1; /* always available */
+#else
arm_cpu_has_neon = arm_has_neon();
+#endif
arm_cpu_has_crc32 = arm_has_crc32();
}
*/
#ifdef __ARM_FEATURE_CRC32
-#include <arm_acle.h>
-#ifdef ZLIB_COMPAT
+# include <arm_acle.h>
+# ifdef ZLIB_COMPAT
# include <zconf.h>
-#else
+# else
# include <zconf-ng.h>
-#endif
-#ifdef __linux__
+# endif
+# ifdef __linux__
# include <stddef.h>
-#endif
+# endif
uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
register uint32_t c;
buf4 = (const uint32_t *) buf;
}
-#ifdef UNROLL_MORE
+# if defined(__aarch64__)
+ if ((len > 4) && ((ptrdiff_t)buf & 4)) {
+ c = __crc32w(c, *buf4++);
+ len -= 4;
+ }
+
+ const uint64_t *buf8 = (const uint64_t *) buf4;
+
+# ifdef UNROLL_MORE
+ while (len >= 32) {
+ c = __crc32d(c, *buf8++);
+ c = __crc32d(c, *buf8++);
+ c = __crc32d(c, *buf8++);
+ c = __crc32d(c, *buf8++);
+ len -= 32;
+ }
+# endif
+
+ while (len >= 8) {
+ c = __crc32d(c, *buf8++);
+ len -= 8;
+ }
+
+ if (len >= 4) {
+ buf4 = (const uint32_t *) buf8;
+ c = __crc32w(c, *buf4++);
+ len -= 4;
+ buf2 = (const uint16_t *) buf4;
+ } else {
+ buf2 = (const uint16_t *) buf8;
+ }
+
+ if (len >= 2) {
+ c = __crc32h(c, *buf2++);
+ len -= 2;
+ }
+
+ buf = (const unsigned char *) buf2;
+# else /* __aarch64__ */
+
+# ifdef UNROLL_MORE
while (len >= 32) {
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
len -= 32;
}
-#endif
+# endif
while (len >= 4) {
c = __crc32w(c, *buf4++);
} else {
buf = (const unsigned char *) buf4;
}
+# endif /* __aarch64__ */
if (len) {
c = __crc32b(c, *buf);
c = ~c;
return c;
}
-#endif
+#endif /* __ARM_FEATURE_CRC32 */
# 64-bit ARM specific optimizations
aarch64)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=aarch64
- ARCHDIR=arch/aarch64
+ ARCHDIR=arch/arm
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo"
#if defined(X86_CPUID)
# include "arch/x86/x86.h"
-#elif defined(__aarch64__)
-# include "arch/aarch64/arm.h"
-#elif defined(__arm__) || defined(_M_ARM)
+#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
# include "arch/arm/arm.h"
#endif