From 2c3f23b018c719451a69b043bb5eaaa28d4aa014 Mon Sep 17 00:00:00 2001
From: ZijianLi <zijian.oerv@isrc.iscas.ac.cn>
Date: Sun, 29 Jun 2025 15:36:25 +0800
Subject: [PATCH] fix dereferencing type-punned pointer error

---
 lib/compress/zstd_compress.c | 122 ++++++++++++++++++-----------------
 1 file changed, 64 insertions(+), 58 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 9b7aaf9f4..fc29b56ab 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -7287,7 +7287,7 @@ static size_t convertSequences_noRepcodes(
 #elif defined ZSTD_ARCH_RISCV_RVV
 #include <riscv_vector.h>
 /*
- * Convert `vl` sequences per iteration, using AVX2 intrinsics:
+ * Convert `vl` sequences per iteration, using RVV intrinsics:
  *   - offset -> offBase = offset + 2
  *   - litLength -> (U16) litLength
  *   - matchLength -> (U16)(matchLength - 3)
@@ -7300,7 +7300,8 @@ static size_t convertSequences_noRepcodes(
  */
 static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence* inSeqs, size_t nbSequences) {
     size_t longLen = 0;
-
+    size_t vl = 0;
+    typedef uint32_t __attribute__((may_alias)) aliased_u32;
     /* RVV depends on the specific definition of target structures */
     ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16);
     ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0);
@@ -7310,62 +7311,68 @@ static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence*
     ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0);
     ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4);
     ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6);
-    size_t vl = 0;
+    
     for (size_t i = 0; i < nbSequences; i += vl) {
 
-        vl = __riscv_vsetvl_e32m2(nbSequences-i);
-        // Loading structure member variables
-        vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
-            (const int32_t*)&inSeqs[i], 
-            vl
-        );
-        vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
-        vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
-        vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
-        // offset + ZSTD_REP_NUM
-        vuint32m2_t v_offBase = __riscv_vadd_vx_u32m2(v_offset, ZSTD_REP_NUM, vl); 
-        // Check for integer overflow
-        // Cast to a 16-bit variable
-        vbool16_t lit_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_lit, 65535, vl);
-        vuint16m1_t v_lit_clamped = __riscv_vncvt_x_x_w_u16m1(v_lit, vl);
-
-        vbool16_t ml_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_match, 65535+MINMATCH, vl);
-        vuint16m1_t v_ml_clamped = __riscv_vncvt_x_x_w_u16m1(__riscv_vsub_vx_u32m2(v_match, MINMATCH, vl), vl);
-
-        // Pack two 16-bit fields into a 32-bit value (little-endian)
-        // The lower 16 bits contain litLength, and the upper 16 bits contain mlBase
-        vuint32m2_t v_lit_ml_combined = __riscv_vsll_vx_u32m2(
-            __riscv_vwcvtu_x_x_v_u32m2(v_ml_clamped, vl), // Convert matchLength to 32-bit
-            16, 
-            vl
-        );
-        v_lit_ml_combined = __riscv_vor_vv_u32m2(
-            v_lit_ml_combined,
-            __riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl),
-            vl
-        );
-        // Create a vector of SeqDef structures
-        // Store the offBase, litLength, and mlBase in a vector of SeqDef
-        vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2(
-            v_offBase,          
-            v_lit_ml_combined   
-        );
-        __riscv_vsseg2e32_v_u32m2x2(
-            (uint32_t*)&dstSeqs[i], 
-            store_data,             
-            vl                      
-        );
-        // Find the first index where an overflow occurs
-        int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl);
-        int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl);
+        vl = __riscv_vsetvl_e32m2(nbSequences-i);       
+        {
+            // Loading structure member variables
+            vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
+                (const aliased_u32*)((const void*)&inSeqs[i]), 
+                vl
+            );
+            vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
+            vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
+            vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
+            // offset + ZSTD_REP_NUM
+            vuint32m2_t v_offBase = __riscv_vadd_vx_u32m2(v_offset, ZSTD_REP_NUM, vl); 
+            // Check for integer overflow
+            // Cast to a 16-bit variable
+            vbool16_t lit_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_lit, 65535, vl);
+            vuint16m1_t v_lit_clamped = __riscv_vncvt_x_x_w_u16m1(v_lit, vl);
+
+            vbool16_t ml_overflow = __riscv_vmsgtu_vx_u32m2_b16(v_match, 65535+MINMATCH, vl);
+            vuint16m1_t v_ml_clamped = __riscv_vncvt_x_x_w_u16m1(__riscv_vsub_vx_u32m2(v_match, MINMATCH, vl), vl);
+
+            // Pack two 16-bit fields into a 32-bit value (little-endian)
+            // The lower 16 bits contain litLength, and the upper 16 bits contain mlBase
+            vuint32m2_t v_lit_ml_combined = __riscv_vsll_vx_u32m2(
+                __riscv_vwcvtu_x_x_v_u32m2(v_ml_clamped, vl), // Convert matchLength to 32-bit
+                16, 
+                vl
+            );
+            v_lit_ml_combined = __riscv_vor_vv_u32m2(
+                v_lit_ml_combined,
+                __riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl),
+                vl
+            );
+            {
+                // Create a vector of SeqDef structures
+                // Store the offBase, litLength, and mlBase in a vector of SeqDef
+                vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2(
+                    v_offBase,          
+                    v_lit_ml_combined   
+                );
+                __riscv_vsseg2e32_v_u32m2x2(
+                    (aliased_u32*)((void*)&dstSeqs[i]), 
+                    store_data,             
+                    vl                      
+                );
+            }
+            {
+                // Find the first index where an overflow occurs
+                int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl);
+                int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl);
 
-        if (UNLIKELY(first_ml != -1)) {
-            assert(longLen == 0);
-            longLen = i + first_ml + 1;
-        }
-        if (UNLIKELY(first_lit != -1)) {
-            assert(longLen == 0);
-            longLen = i + first_lit + 1 + nbSequences;
+                if (UNLIKELY(first_ml != -1)) {
+                    assert(longLen == 0);
+                    longLen = i + first_ml + 1;
+                }
+                if (UNLIKELY(first_lit != -1)) {
+                    assert(longLen == 0);
+                    longLen = i + first_lit + 1 + nbSequences;
+                }
+            }
         }
     }
     return longLen;
@@ -7547,18 +7554,17 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
     size_t i = 0;
     int found_terminator = 0; 
     size_t vl_max = __riscv_vsetvlmax_e32m1();
+    typedef uint32_t __attribute__((may_alias)) aliased_u32;
     vuint32m1_t v_lit_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
     vuint32m1_t v_match_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
 
     for (; i  < nbSeqs; ) {
         size_t vl = __riscv_vsetvl_e32m2(nbSeqs - i); 
 
-        ptrdiff_t stride = sizeof(ZSTD_Sequence); // 16
         vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
-            (const int32_t*)&seqs[i], 
+            (const aliased_u32*)((const void*)&seqs[i]), 
             vl
         );
-        vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
         vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
         vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);
 
-- 
2.47.2