Use large data type for chunk

author alexsifivetw <alex.chiang@sifive.com>

Sun, 24 Sep 2023 09:53:55 +0000 (02:53 -0700)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Wed, 27 Sep 2023 22:14:26 +0000 (00:14 +0200)
author alexsifivetw <alex.chiang@sifive.com>
Sun, 24 Sep 2023 09:53:55 +0000 (02:53 -0700)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 27 Sep 2023 22:14:26 +0000 (00:14 +0200)
diff --git a/arch/riscv/chunkset_rvv.c b/arch/riscv/chunkset_rvv.c

index 034063ba2aa892d934d475bcf8ca3a02c9f1e1cf..ee43bde2f71d81b3e6f9b2d03ae13a7dec7ff7b1 100644 (file)
--- a/arch/riscv/chunkset_rvv.c
+++ b/arch/riscv/chunkset_rvv.c
@@ -1,26 +1,53 @@
-/* chunkset_rvv.c - General version of chunkset
+/* chunkset_rvv.c - RVV version of chunkset
   * Copyright (C) 2023 SiFive, Inc. All rights reserved.
   * Contributed by Alex Chiang <alex.chiang@sifive.com>
   * For conditions of distribution and use, see copyright notice in zlib.h
   */
-
+#include <riscv_vector.h>
  #include "zbuild.h"
  
  /*
- * It's not a optimized implemantation using RISC-V RVV, but a general optimized one.
- *
   * RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC,
   * so we prefer using large size chunk and copy memory as much as possible.
   */
  #define CHUNK_SIZE 32
  
+#define HAVE_CHUNKMEMSET_2
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+#define CHUNK_MEMSET_RVV_IMPL(elen)                                     \
+do {                                                                    \
+    size_t vl, len = CHUNK_SIZE / sizeof(uint##elen##_t);               \
+    uint##elen##_t val = *(uint##elen##_t*)from;                        \
+    uint##elen##_t* chunk_p = (uint##elen##_t*)chunk;                   \
+    do {                                                                \
+        vl = __riscv_vsetvl_e##elen##m4(len);                           \
+        vuint##elen##m4_t v_val = __riscv_vmv_v_x_u##elen##m4(val, vl); \
+        __riscv_vse##elen##_v_u##elen##m4(chunk_p, v_val, vl);          \
+        len -= vl; chunk_p += vl;                                       \
+    } while (len > 0);                                                  \
+} while (0)
+
  /* We don't have a 32-byte datatype for RISC-V arch. */
  typedef struct chunk_s {
-    uint8_t data[CHUNK_SIZE];
+    uint64_t data[4];
  } chunk_t;
  
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(16);
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(32);
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(64);
+}
+
  static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
-    memcpy(chunk->data, s, CHUNK_SIZE);
+    memcpy(chunk->data, (uint8_t *)s, CHUNK_SIZE);
  }
  
  static inline void storechunk(uint8_t *out, chunk_t *chunk) {
@@ -43,7 +70,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) {
   * However, RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC,
   * such that, we prefer copy large memory size once to make good use of the the RVV advance.
   * 
- * To be aligned to the other platforms, we did't modify `CHUNKCOPY` method a lot,
+ * To be aligned to the other platforms, we didn't modify `CHUNKCOPY` method a lot,
   * but we still copy as much memory as possible for some conditions.
   * 
   * case 1: out - from >= len (no overlap)
@@ -60,8 +87,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) {
  static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
      Assert(len > 0, "chunkcopy should never have a length 0");
      int32_t align = ((len - 1) % sizeof(chunk_t)) + 1;
-    chunk_t chunk;
-    memcpy(out, from, sizeof(chunk));
+    memcpy(out, from, sizeof(chunk_t));
      out += align;
      from += align;
      len -= align;
@@ -80,7 +106,7 @@ static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len
          len -= dist;
      }
      while (len > 0) {
-        memcpy(out, from, sizeof(chunk));
+        memcpy(out, from, sizeof(chunk_t));
          out += sizeof(chunk_t);
          from += sizeof(chunk_t);
          len -= sizeof(chunk_t);
author	alexsifivetw <alex.chiang@sifive.com>
	Sun, 24 Sep 2023 09:53:55 +0000 (02:53 -0700)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Wed, 27 Sep 2023 22:14:26 +0000 (00:14 +0200)