From: Richard Henderson Date: Wed, 23 Jul 2025 16:54:54 +0000 (+0100) Subject: target/arm: Pack mtedesc into upper 32 bits of descriptor X-Git-Tag: v10.1.0-rc1~9^2~6 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=aba39946baaf5ca73aae0b79e2cd0790ddafe291;p=thirdparty%2Fqemu.git target/arm: Pack mtedesc into upper 32 bits of descriptor Instead of trying to pack mtedesc into the upper 17 bits of a 32-bit gvec descriptor, pass the gvec descriptor in the lower 32 bits and the mte descriptor in the upper 32 bits of a 64-bit operand. This fixes two bugs: (1) in gen_sve_ldr() and gen_sve_str() call gen_mte_checkN() with a length value which is the SVE vector length and can be up to 256 bytes. We don't assert there that it fits in the descriptor, so we would just fail to do the MTE checks on the right length of memory if the VL is more than 32 bytes (2) the new-in-SVE2p1 insns LD3Q, LD4Q, ST3Q, ST4Q also involve transfers of more than 32 bytes of memory. In this case we would assert at translate time. (Note for potential backporting: this commit depends on the previous "target/arm: Expand the descriptor for SME/SVE memory ops to i64".) Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Message-id: 20250723165458.3509150-3-peter.maydell@linaro.org [PMM: expand commit message to clarify that we are fixing bugs here] Signed-off-by: Peter Maydell --- diff --git a/target/arm/internals.h b/target/arm/internals.h index c4765e4489..1b3d0244fd 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1623,19 +1623,13 @@ FIELD(PREDDESC, OPRSZ, 0, 6) FIELD(PREDDESC, ESZ, 6, 2) FIELD(PREDDESC, DATA, 8, 24) -/* - * The SVE simd_data field, for memory ops, contains either - * rd (5 bits) or a shift count (2 bits). - */ -#define SVE_MTEDESC_SHIFT 5 - /* Bits within a descriptor passed to the helper_mte_check* functions. */ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, ALIGN, 9, 3) -FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */ +FIELD(MTEDESC, SIZEM1, 12, 32 - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 0b55f13f8c..075360d8b8 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -666,19 +666,16 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, - target_ulong addr, uint32_t desc, uintptr_t ra, + target_ulong addr, uint64_t desc, uintptr_t ra, const int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn, ClearFn *clr_fn, CopyFn *cpy_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -854,16 +851,13 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, - uint32_t desc, uintptr_t ra, int esz, bool vertical, + uint64_t desc, uintptr_t ra, int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 9fc2c05879..d0fb4138d2 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -6362,17 +6362,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6727,17 +6724,14 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t retaddr, + uint64_t desc, const uintptr_t retaddr, const int esz, const int msz, const SVEContFault fault, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6985,17 +6979,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -7183,14 +7174,12 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7395,15 +7384,13 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, const int esz, const int msz, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7600,14 +7587,12 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7853,14 +7838,15 @@ static void sve2p1_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, static inline QEMU_ALWAYS_INLINE void sve2p1_ld1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, - uint32_t png, uint32_t desc, + uint32_t png, uint64_t desc64, const uintptr_t ra, const MemOp esz, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); const intptr_t reg_max = simd_oprsz(desc); const unsigned esize = 1 << esz; intptr_t count_off, count_last; @@ -8025,14 +8011,15 @@ DO_LD1_2(ld1dd, MO_64) static inline QEMU_ALWAYS_INLINE void sve2p1_st1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, - uint32_t png, uint32_t desc, + uint32_t png, uint64_t desc64, const uintptr_t ra, const int esz, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); const intptr_t reg_max = simd_oprsz(desc); const unsigned esize = 1 << esz; intptr_t count_off, count_last; diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 849151826e..5cba7b87bd 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4893,7 +4893,6 @@ uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, assert(nregs >= 1 && nregs <= 4); sizem1 = (nregs << msz) - 1; assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); - assert(data < 1u << SVE_MTEDESC_SHIFT); if (s->mte_active[0]) { desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); @@ -4901,9 +4900,9 @@ uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); - desc <<= SVE_MTEDESC_SHIFT; + desc <<= 32; } - return simd_desc(vsz, vsz, desc | data); + return simd_desc(vsz, vsz, data) | desc; } static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,