]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
KVM: riscv: Fast-path dirty logging write faults
authorJinyu Tang <tjytimi@163.com>
Sun, 17 May 2026 15:34:27 +0000 (23:34 +0800)
committerAnup Patel <anup@brainfault.org>
Wed, 3 Jun 2026 12:50:04 +0000 (18:20 +0530)
With dirty logging enabled, guest writes often fault on an existing 4K
G-stage leaf that was write-protected only for dirty tracking. The slow
path still performs the full fault handling flow and takes mmu_lock for
write, even though the page-table shape does not change.

x86 handles the analogous case in its fast page fault path by atomically
making a writable SPTE writable again when the fault is only a
write-protection fault. Add the same style of fast path for RISC-V. If a
write fault hits an existing 4K leaf in a writable dirty-log memslot,
mark the page dirty and atomically set the PTE writable and dirty under
the read side of mmu_lock.

The dirty bitmap is updated before the PTE becomes writable again. The
PTE D bit is also set so systems that trap on a clear D bit do not fall
back to the slow path for a writable but clean PTE.

Signed-off-by: Jinyu Tang <tjytimi@163.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20260517153427.94889-6-tjytimi@163.com
Signed-off-by: Anup Patel <anup@brainfault.org>
arch/riscv/kvm/mmu.c

index 514f06a1f688671a52760f7271746d97c474b5c1..c847b101c73ef72f72d2589a758a12bb96788652 100644 (file)
@@ -438,6 +438,77 @@ static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
        return PAGE_SIZE;
 }
 
+static bool kvm_riscv_mmu_dirty_log_write_fault_fast(struct kvm *kvm,
+                                                    struct kvm_memory_slot *memslot,
+                                                    gpa_t gpa,
+                                                    struct kvm_gstage_mapping *out_map)
+{
+       struct kvm_gstage gstage;
+       unsigned long mmu_seq;
+       pte_t old_pte, new_pte;
+       pte_t *ptep;
+       gfn_t gfn = gpa >> PAGE_SHIFT;
+       u32 ptep_level;
+       bool dirty_marked = false;
+       bool ret;
+
+       kvm_riscv_gstage_init(&gstage, kvm);
+       mmu_seq = kvm->mmu_invalidate_seq;
+
+       read_lock(&kvm->mmu_lock);
+
+       if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
+               ret = false;
+               goto out_unlock;
+       }
+
+       if (!kvm_riscv_gstage_get_leaf(&gstage, gpa, &ptep, &ptep_level) ||
+           ptep_level) {
+               ret = false;
+               goto out_unlock;
+       }
+
+       for (;;) {
+               old_pte = ptep_get(ptep);
+               if (!(pte_val(old_pte) & _PAGE_LEAF)) {
+                       ret = false;
+                       break;
+               }
+
+               if (!dirty_marked) {
+                       mark_page_dirty_in_slot(kvm, memslot, gfn);
+                       dirty_marked = true;
+               }
+
+               if ((pte_val(old_pte) & (_PAGE_WRITE | _PAGE_DIRTY)) ==
+                   (_PAGE_WRITE | _PAGE_DIRTY)) {
+                       new_pte = old_pte;
+                       ret = true;
+                       break;
+               }
+
+               new_pte = pte_mkdirty(pte_mkwrite_novma(old_pte));
+
+               if (kvm_riscv_gstage_try_update_pte(&gstage, ptep_level, gpa,
+                                                   ptep, old_pte, new_pte)) {
+                       ret = true;
+                       break;
+               }
+               cpu_relax();
+       }
+
+out_unlock:
+       read_unlock(&kvm->mmu_lock);
+
+       if (ret) {
+               out_map->addr = gpa & PAGE_MASK;
+               out_map->level = 0;
+               out_map->pte = new_pte;
+       }
+
+       return ret;
+}
+
 int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
                      gpa_t gpa, unsigned long hva, bool is_write,
                      struct kvm_gstage_mapping *out_map)
@@ -461,6 +532,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
        /* Setup initial state of output mapping */
        memset(out_map, 0, sizeof(*out_map));
 
+       if (is_write && logging &&
+           kvm_riscv_mmu_dirty_log_write_fault_fast(kvm, memslot, gpa, out_map))
+               return 0;
+
        /* We need minimum second+third level pages */
        ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels);
        if (ret) {