return 0;
}
+static void kvm_riscv_gstage_update_pte_prot(struct kvm_gstage *gstage, u32 level,
+ gpa_t addr, pte_t *ptep, pgprot_t prot)
+{
+ pte_t new_pte;
+
+ if (pgprot_val(pte_pgprot(ptep_get(ptep))) == pgprot_val(prot))
+ return;
+
+ new_pte = pfn_pte(pte_pfn(ptep_get(ptep)), prot);
+ new_pte = pte_mkdirty(new_pte);
+
+ set_pte(ptep, new_pte);
+
+ gstage_tlb_flush(gstage, level, addr);
+}
+
int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa, unsigned long page_size,
bool page_rdonly, bool page_exec,
struct kvm_gstage_mapping *out_map)
{
+ bool found_leaf;
+ u32 ptep_level;
pgprot_t prot;
+ pte_t *ptep;
int ret;
out_map->addr = gpa;
else
prot = PAGE_WRITE;
}
+
+ found_leaf = kvm_riscv_gstage_get_leaf(gstage, gpa, &ptep, &ptep_level);
+ if (found_leaf) {
+ /*
+ * ptep_level is the current gstage mapping level of addr, out_map->level
+ * is the required mapping level during fault handling.
+ *
+ * 1) ptep_level > out_map->level
+ * This happens when dirty logging is enabled and huge pages are used.
+ * KVM must track the pages at 4K level, and split the huge mapping
+ * into 4K mappings.
+ *
+ * 2) ptep_level < out_map->level
+ * This happens when dirty logging is disabled and huge pages are used.
+ * The gstage is split into 4K mappings, but the out_map level is now
+ * back to the huge page level. Ignore the out_map level this time, and
+ * just update the pte prot here. Otherwise, we would fall back to mapping
+ * the gstage at huge page level in `kvm_riscv_gstage_set_pte`, with the
+ * overhead of freeing the page tables(not support now), which would slow
+ * down the vCPUs' performance.
+ *
+ * It is better to recover the huge page mapping in the ioctl context when
+ * disabling dirty logging.
+ *
+ * 3) ptep_level == out_map->level
+ * We already have the ptep, just update the pte prot if the pfn not change.
+ * There is no need to invoke `kvm_riscv_gstage_set_pte` again.
+ */
+ if (ptep_level > out_map->level) {
+ kvm_riscv_gstage_split_huge(gstage, pcache, gpa,
+ out_map->level, true);
+ } else if (ALIGN_DOWN(PFN_PHYS(pte_pfn(ptep_get(ptep))), page_size) == hpa) {
+ kvm_riscv_gstage_update_pte_prot(gstage, ptep_level, gpa, ptep, prot);
+ return 0;
+ }
+ }
+
out_map->pte = pfn_pte(PFN_DOWN(hpa), prot);
out_map->pte = pte_mkdirty(out_map->pte);
return kvm_riscv_gstage_set_pte(gstage, pcache, out_map);
}
+static inline unsigned long make_child_pte(unsigned long huge_pte, int index,
+ unsigned long child_page_size)
+{
+ unsigned long child_pte = huge_pte;
+ unsigned long child_pfn_offset;
+
+ /*
+ * The child_pte already has the base address of the huge page being
+ * split. So we just have to OR in the offset to the page at the next
+ * lower level for the given index.
+ */
+ child_pfn_offset = index * (child_page_size / PAGE_SIZE);
+ child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0)));
+
+ return child_pte;
+}
+
+int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t addr, u32 target_level, bool flush)
+{
+ u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ pte_t *next_ptep = (pte_t *)gstage->pgd;
+ unsigned long huge_pte, child_pte;
+ unsigned long child_page_size;
+ pte_t *ptep;
+ int i, ret;
+
+ if (!pcache)
+ return -ENOMEM;
+
+ while(current_level > target_level) {
+ ptep = (pte_t *)&next_ptep[gstage_pte_index(addr, current_level)];
+
+ if (!pte_val(ptep_get(ptep)))
+ break;
+
+ if (!gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
+ current_level--;
+ continue;
+ }
+
+ huge_pte = pte_val(ptep_get(ptep));
+
+ ret = gstage_level_to_page_size(current_level - 1, &child_page_size);
+ if (ret)
+ return ret;
+
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
+ if (!next_ptep)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ child_pte = make_child_pte(huge_pte, i, child_page_size);
+ set_pte((pte_t *)&next_ptep[i], __pte(child_pte));
+ }
+
+ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE)));
+
+ if (flush)
+ gstage_tlb_flush(gstage, current_level, addr);
+
+ current_level--;
+ }
+
+ return 0;
+}
+
void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op)
{