From: Vladislav Shchapov Date: Sat, 20 Dec 2025 22:38:50 +0000 (+0500) Subject: Simplify LoongArch64 assembler. GCC 16, LLVM 22 have LASX and LSX conversion intrinsics. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f60edf41cf5c983eaa708197e8c1afdd0e34690b;p=thirdparty%2Fzlib-ng.git Simplify LoongArch64 assembler. GCC 16, LLVM 22 have LASX and LSX conversion intrinsics. Signed-off-by: Vladislav Shchapov --- diff --git a/arch/loongarch/chunkset_lasx.c b/arch/loongarch/chunkset_lasx.c index 03a0a9c85..6ac439097 100644 --- a/arch/loongarch/chunkset_lasx.c +++ b/arch/loongarch/chunkset_lasx.c @@ -36,7 +36,7 @@ static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { } static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) { - *chunk = lasx_broadcastsi128_si256(__lsx_vld(from, 0)); + *chunk = lasx_broadcast_128(__lsx_vld(from, 0)); } static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { @@ -64,7 +64,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t * shuffles and combining the halves later */ __m256i perm_vec = __lasx_xvld(permute_table+lut_rem.idx, 0); __m128i ret_vec0 = __lsx_vld(buf, 0); - ret_vec = lasx_set_si128(ret_vec0, ret_vec0); + ret_vec = __lasx_concat_128(ret_vec0, ret_vec0); ret_vec = lasx_shuffle_b(ret_vec, perm_vec); } else { __m128i ret_vec0 = __lsx_vld(buf, 0); @@ -76,7 +76,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t /* Since we can't wrap twice, we can simply keep the later half exactly how it is instead of having to _also_ * shuffle those values */ __m128i latter_half = __lsx_vbitsel_v(ret_vec1, xlane_res, xlane_permutes); - ret_vec = lasx_set_si128(latter_half, ret_vec0); + ret_vec = __lasx_concat_128(ret_vec0, latter_half); } return ret_vec; @@ -93,7 +93,7 @@ static inline void storehalfchunk(uint8_t *out, halfchunk_t *chunk) { static inline chunk_t halfchunk2whole(halfchunk_t *chunk) { /* We zero extend mostly to appease some memory sanitizers. These bytes are ultimately * unlikely to be actually written or read from */ - return lasx_zextsi128_si256(*chunk); + return lasx_zext_128(*chunk); } static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { diff --git a/arch/loongarch/lasxintrin_ext.h b/arch/loongarch/lasxintrin_ext.h index 833267de3..d97eab8df 100644 --- a/arch/loongarch/lasxintrin_ext.h +++ b/arch/loongarch/lasxintrin_ext.h @@ -9,63 +9,26 @@ #include -#ifdef __clang__ -# define LA_VREGS_PREFIX "$vr" -# define LA_XREGS_PREFIX "$xr" -#else /* GCC */ -# define LA_VREGS_PREFIX "$f" -# define LA_XREGS_PREFIX "$f" +static inline __m256i lasx_zext_128(__m128i src) { +#ifdef __loongarch_asx_sx_conv + return __lasx_insert_128_lo(__lasx_xvldi(0), src); +#else + __m256i dest = __lasx_xvldi(0); + __asm__ volatile ("xvpermi.q %u0,%u2,0x30\n" : "=f"(dest) : "0"(dest), "f"(src)); + return dest; #endif -#define LA_ALL_REGS "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31" - -static inline __m256i lasx_zextsi128_si256(__m128i in) { - __m256i out = __lasx_xvldi(0); - __asm__ volatile ( - ".irp i," LA_ALL_REGS "\n\t" - " .ifc %[out], " LA_XREGS_PREFIX"\\i \n\t" - " .irp j," LA_ALL_REGS "\n\t" - " .ifc %[in], " LA_VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "+f" (out) : [in] "f" (in) - ); - return out; } -static inline __m256i lasx_set_si128(__m128i inhi, __m128i inlo) { - __m256i out; - __asm__ volatile ( - ".irp i," LA_ALL_REGS "\n\t" - " .ifc %[hi], " LA_VREGS_PREFIX "\\i \n\t" - " .irp j," LA_ALL_REGS "\n\t" - " .ifc %[lo], " LA_VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".ifnc %[out], %[hi] \n\t" - ".irp i," LA_ALL_REGS "\n\t" - " .ifc %[out], " LA_XREGS_PREFIX "\\i \n\t" - " .irp j," LA_ALL_REGS "\n\t" - " .ifc %[hi], " LA_VREGS_PREFIX "\\j \n\t" - " xvori.b $xr\\i, $xr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f" (out), [hi] "+f" (inhi) - : [lo] "f" (inlo) - ); - return out; +#ifndef __loongarch_asx_sx_conv +static inline __m256i __lasx_concat_128(__m128i lo, __m128i hi) { + __m256i dest; + __asm__ volatile ("xvpermi.q %u0,%u2,0x02\n" : "=f"(dest) : "0"(lo), "f"(hi)); + return dest; } +#endif -static inline __m256i lasx_broadcastsi128_si256(__m128i in) { - return lasx_set_si128(in, in); +static inline __m256i lasx_broadcast_128(__m128i in) { + return __lasx_concat_128(in, in); } static inline __m256i lasx_sad_bu(__m256i a, __m256i b) {