From: alexsifivetw Date: Sun, 24 Sep 2023 09:53:55 +0000 (-0700) Subject: Use large data type for chunk X-Git-Tag: 2.1.4~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d30c7bf8fc4d7ad6c48b9f5eaccb8421a270d716;p=thirdparty%2Fzlib-ng.git Use large data type for chunk Implement chunk memset for specific length --- diff --git a/arch/riscv/chunkset_rvv.c b/arch/riscv/chunkset_rvv.c index 034063ba..ee43bde2 100644 --- a/arch/riscv/chunkset_rvv.c +++ b/arch/riscv/chunkset_rvv.c @@ -1,26 +1,53 @@ -/* chunkset_rvv.c - General version of chunkset +/* chunkset_rvv.c - RVV version of chunkset * Copyright (C) 2023 SiFive, Inc. All rights reserved. * Contributed by Alex Chiang * For conditions of distribution and use, see copyright notice in zlib.h */ - +#include #include "zbuild.h" /* - * It's not a optimized implemantation using RISC-V RVV, but a general optimized one. - * * RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, * so we prefer using large size chunk and copy memory as much as possible. */ #define CHUNK_SIZE 32 +#define HAVE_CHUNKMEMSET_2 +#define HAVE_CHUNKMEMSET_4 +#define HAVE_CHUNKMEMSET_8 + +#define CHUNK_MEMSET_RVV_IMPL(elen) \ +do { \ + size_t vl, len = CHUNK_SIZE / sizeof(uint##elen##_t); \ + uint##elen##_t val = *(uint##elen##_t*)from; \ + uint##elen##_t* chunk_p = (uint##elen##_t*)chunk; \ + do { \ + vl = __riscv_vsetvl_e##elen##m4(len); \ + vuint##elen##m4_t v_val = __riscv_vmv_v_x_u##elen##m4(val, vl); \ + __riscv_vse##elen##_v_u##elen##m4(chunk_p, v_val, vl); \ + len -= vl; chunk_p += vl; \ + } while (len > 0); \ +} while (0) + /* We don't have a 32-byte datatype for RISC-V arch. */ typedef struct chunk_s { - uint8_t data[CHUNK_SIZE]; + uint64_t data[4]; } chunk_t; +static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(16); +} + +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(32); +} + +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(64); +} + static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { - memcpy(chunk->data, s, CHUNK_SIZE); + memcpy(chunk->data, (uint8_t *)s, CHUNK_SIZE); } static inline void storechunk(uint8_t *out, chunk_t *chunk) { @@ -43,7 +70,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { * However, RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, * such that, we prefer copy large memory size once to make good use of the the RVV advance. * - * To be aligned to the other platforms, we did't modify `CHUNKCOPY` method a lot, + * To be aligned to the other platforms, we didn't modify `CHUNKCOPY` method a lot, * but we still copy as much memory as possible for some conditions. * * case 1: out - from >= len (no overlap) @@ -60,8 +87,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { Assert(len > 0, "chunkcopy should never have a length 0"); int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; - chunk_t chunk; - memcpy(out, from, sizeof(chunk)); + memcpy(out, from, sizeof(chunk_t)); out += align; from += align; len -= align; @@ -80,7 +106,7 @@ static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len len -= dist; } while (len > 0) { - memcpy(out, from, sizeof(chunk)); + memcpy(out, from, sizeof(chunk_t)); out += sizeof(chunk_t); from += sizeof(chunk_t); len -= sizeof(chunk_t);