From: Jinyu Tang Date: Sun, 17 May 2026 15:34:27 +0000 (+0800) Subject: KVM: riscv: Fast-path dirty logging write faults X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7705be59eb2d173933b55608ff7d26e14343e2f3;p=thirdparty%2Fkernel%2Flinux.git KVM: riscv: Fast-path dirty logging write faults With dirty logging enabled, guest writes often fault on an existing 4K G-stage leaf that was write-protected only for dirty tracking. The slow path still performs the full fault handling flow and takes mmu_lock for write, even though the page-table shape does not change. x86 handles the analogous case in its fast page fault path by atomically making a writable SPTE writable again when the fault is only a write-protection fault. Add the same style of fast path for RISC-V. If a write fault hits an existing 4K leaf in a writable dirty-log memslot, mark the page dirty and atomically set the PTE writable and dirty under the read side of mmu_lock. The dirty bitmap is updated before the PTE becomes writable again. The PTE D bit is also set so systems that trap on a clear D bit do not fall back to the slow path for a writable but clean PTE. Signed-off-by: Jinyu Tang Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260517153427.94889-6-tjytimi@163.com Signed-off-by: Anup Patel --- diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 514f06a1f688..c847b101c73e 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -438,6 +438,77 @@ static unsigned long transparent_hugepage_adjust(struct kvm *kvm, return PAGE_SIZE; } +static bool kvm_riscv_mmu_dirty_log_write_fault_fast(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gpa_t gpa, + struct kvm_gstage_mapping *out_map) +{ + struct kvm_gstage gstage; + unsigned long mmu_seq; + pte_t old_pte, new_pte; + pte_t *ptep; + gfn_t gfn = gpa >> PAGE_SHIFT; + u32 ptep_level; + bool dirty_marked = false; + bool ret; + + kvm_riscv_gstage_init(&gstage, kvm); + mmu_seq = kvm->mmu_invalidate_seq; + + read_lock(&kvm->mmu_lock); + + if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) { + ret = false; + goto out_unlock; + } + + if (!kvm_riscv_gstage_get_leaf(&gstage, gpa, &ptep, &ptep_level) || + ptep_level) { + ret = false; + goto out_unlock; + } + + for (;;) { + old_pte = ptep_get(ptep); + if (!(pte_val(old_pte) & _PAGE_LEAF)) { + ret = false; + break; + } + + if (!dirty_marked) { + mark_page_dirty_in_slot(kvm, memslot, gfn); + dirty_marked = true; + } + + if ((pte_val(old_pte) & (_PAGE_WRITE | _PAGE_DIRTY)) == + (_PAGE_WRITE | _PAGE_DIRTY)) { + new_pte = old_pte; + ret = true; + break; + } + + new_pte = pte_mkdirty(pte_mkwrite_novma(old_pte)); + + if (kvm_riscv_gstage_try_update_pte(&gstage, ptep_level, gpa, + ptep, old_pte, new_pte)) { + ret = true; + break; + } + cpu_relax(); + } + +out_unlock: + read_unlock(&kvm->mmu_lock); + + if (ret) { + out_map->addr = gpa & PAGE_MASK; + out_map->level = 0; + out_map->pte = new_pte; + } + + return ret; +} + int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write, struct kvm_gstage_mapping *out_map) @@ -461,6 +532,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, /* Setup initial state of output mapping */ memset(out_map, 0, sizeof(*out_map)); + if (is_write && logging && + kvm_riscv_mmu_dirty_log_write_fault_fast(kvm, memslot, gpa, out_map)) + return 0; + /* We need minimum second+third level pages */ ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels); if (ret) {