From: Richard Henderson Date: Tue, 9 Jun 2026 19:20:45 +0000 (-0700) Subject: target/arm: Implement LUTI2, LUTI4 for AdvSIMD X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5fbdd62ee22f929400a623b4a1725dea83b6da70;p=thirdparty%2Fqemu.git target/arm: Implement LUTI2, LUTI4 for AdvSIMD Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20260609192110.752384-22-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 610047846f..2d06f14e1d 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -1355,6 +1355,12 @@ EXT_q 0110 1110 00 0 rm:5 0 imm:4 0 rn:5 rd:5 TBL_TBX 0 q:1 00 1110 000 rm:5 0 len:2 tbx:1 00 rn:5 rd:5 +LUTI2_1b 0100 1110 100 rm:5 0 idx:2 100 rn:5 rd:5 &rrx_e esz=0 +LUTI2_1h 0100 1110 110 rm:5 0 idx:3 00 rn:5 rd:5 &rrx_e esz=1 + +LUTI4_1b 0100 1110 010 rm:5 0 idx:1 1000 rn:5 rd:5 &rrx_e esz=0 +LUTI4_2h 0100 1110 010 rm:5 0 idx:2 100 rn:5 rd:5 &rrx_e esz=1 + # Advanced SIMD Permute UZP1 0.00 1110 .. 0 ..... 0 001 10 ..... ..... @qrrr_e diff --git a/target/arm/tcg/helper-defs.h b/target/arm/tcg/helper-defs.h index a05f2258f2..05ccf795e8 100644 --- a/target/arm/tcg/helper-defs.h +++ b/target/arm/tcg/helper-defs.h @@ -1122,3 +1122,8 @@ DEF_HELPER_FLAGS_4(sme2_luti4_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) DEF_HELPER_FLAGS_4(sme2_luti4_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) DEF_HELPER_FLAGS_4(sme2_luti4_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(gvec_luti2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_luti2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_luti4_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_luti4_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 5606f610c8..ffd505601c 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -5502,6 +5502,44 @@ static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) return true; } +static bool do_lut_1(DisasContext *s, arg_rrx_e *a, gen_helper_gvec_3 *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->idx, fn); + } + return true; +} + +TRANS_FEAT(LUTI2_1b, aa64_lut, do_lut_1, a, gen_helper_gvec_luti2_b) +TRANS_FEAT(LUTI2_1h, aa64_lut, do_lut_1, a, gen_helper_gvec_luti2_h) +TRANS_FEAT(LUTI4_1b, aa64_lut, do_lut_1, a, gen_helper_gvec_luti4_b) + +static bool trans_LUTI4_2h(DisasContext *s, arg_rrx_e *a) +{ + if (!dc_isar_feature(aa64_lut, s)) { + return false; + } + if (fp_access_check(s)) { + /* + * (Ab)use preg_tmp to merge two disjoint 128-bit quantities + * into a sequential 256-bit table. + */ + QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, vfp.preg_tmp) < 32); + unsigned tmp_ofs = offsetof(CPUARMState, vfp.preg_tmp); + unsigned rn0_ofs = vec_full_reg_offset(s, a->rn); + unsigned rn1_ofs = vec_full_reg_offset(s, (a->rn + 1) % 32); + + tcg_gen_gvec_mov(MO_64, tmp_ofs, rn0_ofs, 16, 16); + tcg_gen_gvec_mov(MO_64, tmp_ofs + 16, rn1_ofs, 16, 16); + + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), tmp_ofs, + vec_full_reg_offset(s, a->rm), + 16, vec_full_reg_size(s), + a->idx, gen_helper_gvec_luti4_h); + } + return true; +} + typedef int simd_permute_idx_fn(int i, int part, int elements); static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index bd8ae5d6a4..f4ff56e034 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -3344,3 +3344,55 @@ DO_SME2_LUT(4,4,h, 2) DO_SME2_LUT(4,4,s, 4) #undef DO_SME2_LUT + +void HELPER(gvec_luti2_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + unsigned part = simd_data(desc); + unsigned vl = simd_oprsz(desc); + unsigned elements = vl / 1; + unsigned ibase = elements * part; + ARMVectorReg scratch; + + do_lut_b(&scratch, vm, vn, elements, ibase, 0, 2, 8, 1); + memcpy(vd, &scratch, vl); + clear_tail(vd, vl, simd_maxsz(desc)); +} + +void HELPER(gvec_luti2_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + unsigned part = simd_data(desc); + unsigned vl = simd_oprsz(desc); + unsigned elements = vl / 2; + unsigned ibase = elements * part; + ARMVectorReg scratch; + + do_lut_h(&scratch, vm, vn, elements, ibase, 0, 2, 16, 1); + memcpy(vd, &scratch, vl); + clear_tail(vd, vl, simd_maxsz(desc)); +} + +void HELPER(gvec_luti4_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + unsigned part = simd_data(desc); + unsigned vl = simd_oprsz(desc); + unsigned elements = vl / 1; + unsigned ibase = elements * part; + ARMVectorReg scratch; + + do_lut_b(&scratch, vm, vn, elements, ibase, 0, 4, 8, 1); + memcpy(vd, &scratch, vl); + clear_tail(vd, vl, simd_maxsz(desc)); +} + +void HELPER(gvec_luti4_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + unsigned part = simd_data(desc); + unsigned vl = simd_oprsz(desc); + unsigned elements = vl / 2; + unsigned ibase = elements * part; + ARMVectorReg scratch; + + do_lut_h(&scratch, vm, vn, elements, ibase, 0, 4, 16, 1); + memcpy(vd, &scratch, vl); + clear_tail(vd, vl, simd_maxsz(desc)); +}