From: Richard Henderson Date: Tue, 21 Oct 2025 19:35:39 +0000 (-0500) Subject: accel/tcg: Introduce and use MO_ALIGN_TLB_ONLY X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4dea00368dc75189af1773e3a06cbdb320b1dc7b;p=thirdparty%2Fqemu.git accel/tcg: Introduce and use MO_ALIGN_TLB_ONLY For Arm, we need 3 cases: (1) the alignment required when accessing Normal memory, (2) the alignment required when accessing Device memory, and (3) the atomicity of the access. When we added TLB_CHECK_ALIGNED, we assumed that cases 2 and 3 were identical, and thus used memop_atomicity_bits for TLB_CHECK_ALIGNED. This is incorrect for multiple reasons, including that the atomicity of the access is adjusted depending on whether or not we are executing within a serial context. For Arm, what is true is that there is an underlying alignment requirement of the access, and for that access Normal memory will support unalignement. Introduce MO_ALIGN_TLB_ONLY to indicate that the alignment specified in MO_AMASK only applies when the TLB entry has TLB_CHECK_ALIGNED set; otherwise no alignment required. Introduce memop_tlb_alignment_bits with an additional bool argument that specifies whether TLB_CHECK_ALIGNED is set. All other usage of memop_alignment_bits assumes it is not. Remove memop_atomicity_bits as unused; it didn't properly support MO_ATOM_SUBWORD anyway. Update target/arm finalize_memop_atom to set MO_ALIGN_TLB_ONLY when strict alignment isn't otherwise required. Suggested-by: Peter Maydell Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3171 Signed-off-by: Richard Henderson Reviewed-by: Manos Pitsidianakis Reviewed-by: Philippe Mathieu-Daudé --- diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 631f1fe135..fd1606c856 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1668,18 +1668,7 @@ static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop, if (likely(!maybe_resized)) { /* Alignment has not been checked by tlb_fill_align. */ - int a_bits = memop_alignment_bits(memop); - - /* - * This alignment check differs from the one above, in that this is - * based on the atomicity of the operation. The intended use case is - * the ARM memory type field of each PTE, where access to pages with - * Device memory type require alignment. - */ - if (unlikely(flags & TLB_CHECK_ALIGNED)) { - int at_bits = memop_atomicity_bits(memop); - a_bits = MAX(a_bits, at_bits); - } + int a_bits = memop_tlb_alignment_bits(memop, flags & TLB_CHECK_ALIGNED); if (unlikely(addr & ((1 << a_bits) - 1))) { cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra); } diff --git a/include/exec/memop.h b/include/exec/memop.h index cf7da3362e..799b5b4221 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -72,6 +72,16 @@ typedef enum MemOp { MO_ALIGN_64 = 6 << MO_ASHIFT, MO_ALIGN = MO_AMASK, + /* + * MO_ALIGN_TLB_ONLY: + * Apply MO_AMASK only along the TCG slow path if TLB_CHECK_ALIGNED + * is set; otherwise unaligned access is permitted. + * This is used by target/arm, where unaligned accesses are + * permitted for pages marked Normal but aligned accesses are + * required for pages marked Device. + */ + MO_ALIGN_TLB_ONLY = 1 << 8, + /* * MO_ATOM_* describes the atomicity requirements of the operation: * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it @@ -104,7 +114,7 @@ typedef enum MemOp { * size of the operation, if aligned. This retains the behaviour * from before this field was introduced. */ - MO_ATOM_SHIFT = 8, + MO_ATOM_SHIFT = 9, MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT, MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT, MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT, @@ -169,16 +179,16 @@ static inline MemOp size_memop(unsigned size) } /** - * memop_alignment_bits: + * memop_tlb_alignment_bits: * @memop: MemOp value * - * Extract the alignment size from the memop. + * Extract the alignment size for use with TLB_CHECK_ALIGNED. */ -static inline unsigned memop_alignment_bits(MemOp memop) +static inline unsigned memop_tlb_alignment_bits(MemOp memop, bool tlb_check) { unsigned a = memop & MO_AMASK; - if (a == MO_UNALN) { + if (a == MO_UNALN || (!tlb_check && (memop & MO_ALIGN_TLB_ONLY))) { /* No alignment required. */ a = 0; } else if (a == MO_ALIGN) { @@ -191,28 +201,15 @@ static inline unsigned memop_alignment_bits(MemOp memop) return a; } -/* - * memop_atomicity_bits: +/** + * memop_alignment_bits: * @memop: MemOp value * - * Extract the atomicity size from the memop. + * Extract the alignment size from the memop. */ -static inline unsigned memop_atomicity_bits(MemOp memop) +static inline unsigned memop_alignment_bits(MemOp memop) { - unsigned size = memop & MO_SIZE; - - switch (memop & MO_ATOM_MASK) { - case MO_ATOM_NONE: - size = MO_8; - break; - case MO_ATOM_IFALIGN_PAIR: - case MO_ATOM_WITHIN16_PAIR: - size = size ? size - 1 : 0; - break; - default: - break; - } - return size; + return memop_tlb_alignment_bits(memop, false); } #endif diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 23f6616811..2e6b149b2d 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -2352,7 +2352,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.) */ if (device) { - unsigned a_bits = memop_atomicity_bits(memop); + unsigned a_bits = memop_tlb_alignment_bits(memop, true); if (address & ((1 << a_bits) - 1)) { fi->type = ARMFault_Alignment; goto do_fault; diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 3292d7cbfd..08b21d7dbf 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -3691,9 +3691,8 @@ static bool trans_STP(DisasContext *s, arg_ldstpair *a) * In all cases, issue one operation with the correct atomicity. */ mop = a->sz + 1; - if (s->align_mem) { - mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); - } + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); mop = finalize_memop_pair(s, mop); if (a->sz == 2) { TCGv_i64 tmp = tcg_temp_new_i64(); @@ -3742,9 +3741,8 @@ static bool trans_LDP(DisasContext *s, arg_ldstpair *a) * since that reuses the most code below. */ mop = a->sz + 1; - if (s->align_mem) { - mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); - } + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); mop = finalize_memop_pair(s, mop); if (a->sz == 2) { int o2 = s->be_data == MO_LE ? 32 : 0; diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index 844d2e29e4..e3c7d9217b 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -520,7 +520,7 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) if (a->align) { align = pow2_align(a->align + 2); /* 4 ** a->align */ } else { - align = s->align_mem ? MO_ALIGN : 0; + align = MO_ALIGN | (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); } /* diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 9a85ea74db..b62104b4ae 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -744,8 +744,8 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) */ static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom) { - if (s->align_mem && !(opc & MO_AMASK)) { - opc |= MO_ALIGN; + if (!(opc & MO_AMASK)) { + opc |= MO_ALIGN | (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); } return opc | atom | s->be_data; } diff --git a/tcg/tcg.c b/tcg/tcg.c index 294762c283..fbf09f5c82 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3039,20 +3039,22 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) case INDEX_op_qemu_ld2: case INDEX_op_qemu_st2: { - const char *s_al, *s_op, *s_at; + const char *s_al, *s_tlb, *s_op, *s_at; MemOpIdx oi = op->args[k++]; MemOp mop = get_memop(oi); unsigned ix = get_mmuidx(oi); + s_tlb = mop & MO_ALIGN_TLB_ONLY ? "tlb+" : ""; s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; - mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); + mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | + MO_ATOM_MASK | MO_ALIGN_TLB_ONLY); /* If all fields are accounted for, print symbolically. */ if (!mop && s_al && s_op && s_at) { - col += ne_fprintf(f, ",%s%s%s,%u", - s_at, s_al, s_op, ix); + col += ne_fprintf(f, ",%s%s%s%s,%u", + s_at, s_al, s_tlb, s_op, ix); } else { mop = get_memop(oi); col += ne_fprintf(f, ",$0x%x,%u", mop, ix);