arch/mips/kvm/mmu.c

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * KVM/MIPS MMU handling in the KVM module.
   7  *
   8  * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
   9  * Authors: Sanjay Lal <sanjayl@kymasys.com>
  10  */
  11
  12 #include <linux/highmem.h>
  13 #include <linux/kvm_host.h>
  14 #include <linux/uaccess.h>
  15 #include <asm/mmu_context.h>
  16 #include <asm/pgalloc.h>
  17
  18 /*
  19  * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
  20  * for which pages need to be cached.
  21  */
  22 #if defined(__PAGETABLE_PMD_FOLDED)
  23 #define KVM_MMU_CACHE_MIN_PAGES 1
  24 #else
  25 #define KVM_MMU_CACHE_MIN_PAGES 2
  26 #endif
  27
  28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
  29 {
  30         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
  31 }
  32
  33 /**
  34  * kvm_pgd_init() - Initialise KVM GPA page directory.
  35  * @page:       Pointer to page directory (PGD) for KVM GPA.
  36  *
  37  * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
  38  * representing no mappings. This is similar to pgd_init(), however it
  39  * initialises all the page directory pointers, not just the ones corresponding
  40  * to the userland address space (since it is for the guest physical address
  41  * space rather than a virtual address space).
  42  */
  43 static void kvm_pgd_init(void *page)
  44 {
  45         unsigned long *p, *end;
  46         unsigned long entry;
  47
  48 #ifdef __PAGETABLE_PMD_FOLDED
  49         entry = (unsigned long)invalid_pte_table;
  50 #else
  51         entry = (unsigned long)invalid_pmd_table;
  52 #endif
  53
  54         p = (unsigned long *)page;
  55         end = p + PTRS_PER_PGD;
  56
  57         do {
  58                 p[0] = entry;
  59                 p[1] = entry;
  60                 p[2] = entry;
  61                 p[3] = entry;
  62                 p[4] = entry;
  63                 p += 8;
  64                 p[-3] = entry;
  65                 p[-2] = entry;
  66                 p[-1] = entry;
  67         } while (p != end);
  68 }
  69
  70 /**
  71  * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
  72  *
  73  * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
  74  * to host physical page mappings.
  75  *
  76  * Returns:     Pointer to new KVM GPA page directory.
  77  *              NULL on allocation failure.
  78  */
  79 pgd_t *kvm_pgd_alloc(void)
  80 {
  81         pgd_t *ret;
  82
  83         ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
  84         if (ret)
  85                 kvm_pgd_init(ret);
  86
  87         return ret;
  88 }
  89
  90 /**
  91  * kvm_mips_walk_pgd() - Walk page table with optional allocation.
  92  * @pgd:        Page directory pointer.
  93  * @addr:       Address to index page table using.
  94  * @cache:      MMU page cache to allocate new page tables from, or NULL.
  95  *
  96  * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
  97  * address @addr. If page tables don't exist for @addr, they will be created
  98  * from the MMU cache if @cache is not NULL.
  99  *
 100  * Returns:     Pointer to pte_t corresponding to @addr.
 101  *              NULL if a page table doesn't exist for @addr and !@cache.
 102  *              NULL if a page table allocation failed.
 103  */
 104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 105                                 unsigned long addr)
 106 {
 107         p4d_t *p4d;
 108         pud_t *pud;
 109         pmd_t *pmd;
 110
 111         pgd += pgd_index(addr);
 112         if (pgd_none(*pgd)) {
 113                 /* Not used on MIPS yet */
 114                 BUG();
 115                 return NULL;
 116         }
 117         p4d = p4d_offset(pgd, addr);
 118         pud = pud_offset(p4d, addr);
 119         if (pud_none(*pud)) {
 120                 pmd_t *new_pmd;
 121
 122                 if (!cache)
 123                         return NULL;
 124                 new_pmd = kvm_mmu_memory_cache_alloc(cache);
 125                 pmd_init(new_pmd);
 126                 pud_populate(NULL, pud, new_pmd);
 127         }
 128         pmd = pmd_offset(pud, addr);
 129         if (pmd_none(*pmd)) {
 130                 pte_t *new_pte;
 131
 132                 if (!cache)
 133                         return NULL;
 134                 new_pte = kvm_mmu_memory_cache_alloc(cache);
 135                 clear_page(new_pte);
 136                 pmd_populate_kernel(NULL, pmd, new_pte);
 137         }
 138         return pte_offset_kernel(pmd, addr);
 139 }
 140
 141 /* Caller must hold kvm->mm_lock */
 142 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
 143                                    struct kvm_mmu_memory_cache *cache,
 144                                    unsigned long addr)
 145 {
 146         return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
 147 }
 148
 149 /*
 150  * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
 151  * Flush a range of guest physical address space from the VM's GPA page tables.
 152  */
 153
 154 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
 155                                    unsigned long end_gpa)
 156 {
 157         int i_min = pte_index(start_gpa);
 158         int i_max = pte_index(end_gpa);
 159         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
 160         int i;
 161
 162         for (i = i_min; i <= i_max; ++i) {
 163                 if (!pte_present(pte[i]))
 164                         continue;
 165
 166                 set_pte(pte + i, __pte(0));
 167         }
 168         return safe_to_remove;
 169 }
 170
 171 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
 172                                    unsigned long end_gpa)
 173 {
 174         pte_t *pte;
 175         unsigned long end = ~0ul;
 176         int i_min = pmd_index(start_gpa);
 177         int i_max = pmd_index(end_gpa);
 178         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 179         int i;
 180
 181         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 182                 if (!pmd_present(pmd[i]))
 183                         continue;
 184
 185                 pte = pte_offset_kernel(pmd + i, 0);
 186                 if (i == i_max)
 187                         end = end_gpa;
 188
 189                 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
 190                         pmd_clear(pmd + i);
 191                         pte_free_kernel(NULL, pte);
 192                 } else {
 193                         safe_to_remove = false;
 194                 }
 195         }
 196         return safe_to_remove;
 197 }
 198
 199 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 200                                    unsigned long end_gpa)
 201 {
 202         pmd_t *pmd;
 203         unsigned long end = ~0ul;
 204         int i_min = pud_index(start_gpa);
 205         int i_max = pud_index(end_gpa);
 206         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 207         int i;
 208
 209         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 210                 if (!pud_present(pud[i]))
 211                         continue;
 212
 213                 pmd = pmd_offset(pud + i, 0);
 214                 if (i == i_max)
 215                         end = end_gpa;
 216
 217                 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
 218                         pud_clear(pud + i);
 219                         pmd_free(NULL, pmd);
 220                 } else {
 221                         safe_to_remove = false;
 222                 }
 223         }
 224         return safe_to_remove;
 225 }
 226
 227 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 228                                    unsigned long end_gpa)
 229 {
 230         p4d_t *p4d;
 231         pud_t *pud;
 232         unsigned long end = ~0ul;
 233         int i_min = pgd_index(start_gpa);
 234         int i_max = pgd_index(end_gpa);
 235         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
 236         int i;
 237
 238         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
 239                 if (!pgd_present(pgd[i]))
 240                         continue;
 241
 242                 p4d = p4d_offset(pgd, 0);
 243                 pud = pud_offset(p4d + i, 0);
 244                 if (i == i_max)
 245                         end = end_gpa;
 246
 247                 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
 248                         pgd_clear(pgd + i);
 249                         pud_free(NULL, pud);
 250                 } else {
 251                         safe_to_remove = false;
 252                 }
 253         }
 254         return safe_to_remove;
 255 }
 256
 257 /**
 258  * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
 259  * @kvm:        KVM pointer.
 260  * @start_gfn:  Guest frame number of first page in GPA range to flush.
 261  * @end_gfn:    Guest frame number of last page in GPA range to flush.
 262  *
 263  * Flushes a range of GPA mappings from the GPA page tables.
 264  *
 265  * The caller must hold the @kvm->mmu_lock spinlock.
 266  *
 267  * Returns:     Whether its safe to remove the top level page directory because
 268  *              all lower levels have been removed.
 269  */
 270 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 271 {
 272         return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
 273                                       start_gfn << PAGE_SHIFT,
 274                                       end_gfn << PAGE_SHIFT);
 275 }
 276
 277 #define BUILD_PTE_RANGE_OP(name, op)                                    \
 278 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start,       \
 279                                  unsigned long end)                     \
 280 {                                                                       \
 281         int ret = 0;                                                    \
 282         int i_min = pte_index(start);                           \
 283         int i_max = pte_index(end);                                     \
 284         int i;                                                          \
 285         pte_t old, new;                                                 \
 286                                                                         \
 287         for (i = i_min; i <= i_max; ++i) {                              \
 288                 if (!pte_present(pte[i]))                               \
 289                         continue;                                       \
 290                                                                         \
 291                 old = pte[i];                                           \
 292                 new = op(old);                                          \
 293                 if (pte_val(new) == pte_val(old))                       \
 294                         continue;                                       \
 295                 set_pte(pte + i, new);                                  \
 296                 ret = 1;                                                \
 297         }                                                               \
 298         return ret;                                                     \
 299 }                                                                       \
 300                                                                         \
 301 /* returns true if anything was done */                                 \
 302 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,       \
 303                                  unsigned long end)                     \
 304 {                                                                       \
 305         int ret = 0;                                                    \
 306         pte_t *pte;                                                     \
 307         unsigned long cur_end = ~0ul;                                   \
 308         int i_min = pmd_index(start);                           \
 309         int i_max = pmd_index(end);                                     \
 310         int i;                                                          \
 311                                                                         \
 312         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 313                 if (!pmd_present(pmd[i]))                               \
 314                         continue;                                       \
 315                                                                         \
 316                 pte = pte_offset_kernel(pmd + i, 0);                            \
 317                 if (i == i_max)                                         \
 318                         cur_end = end;                                  \
 319                                                                         \
 320                 ret |= kvm_mips_##name##_pte(pte, start, cur_end);      \
 321         }                                                               \
 322         return ret;                                                     \
 323 }                                                                       \
 324                                                                         \
 325 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,       \
 326                                  unsigned long end)                     \
 327 {                                                                       \
 328         int ret = 0;                                                    \
 329         pmd_t *pmd;                                                     \
 330         unsigned long cur_end = ~0ul;                                   \
 331         int i_min = pud_index(start);                           \
 332         int i_max = pud_index(end);                                     \
 333         int i;                                                          \
 334                                                                         \
 335         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 336                 if (!pud_present(pud[i]))                               \
 337                         continue;                                       \
 338                                                                         \
 339                 pmd = pmd_offset(pud + i, 0);                           \
 340                 if (i == i_max)                                         \
 341                         cur_end = end;                                  \
 342                                                                         \
 343                 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end);      \
 344         }                                                               \
 345         return ret;                                                     \
 346 }                                                                       \
 347                                                                         \
 348 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,       \
 349                                  unsigned long end)                     \
 350 {                                                                       \
 351         int ret = 0;                                                    \
 352         p4d_t *p4d;                                                     \
 353         pud_t *pud;                                                     \
 354         unsigned long cur_end = ~0ul;                                   \
 355         int i_min = pgd_index(start);                                   \
 356         int i_max = pgd_index(end);                                     \
 357         int i;                                                          \
 358                                                                         \
 359         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
 360                 if (!pgd_present(pgd[i]))                               \
 361                         continue;                                       \
 362                                                                         \
 363                 p4d = p4d_offset(pgd, 0);                               \
 364                 pud = pud_offset(p4d + i, 0);                           \
 365                 if (i == i_max)                                         \
 366                         cur_end = end;                                  \
 367                                                                         \
 368                 ret |= kvm_mips_##name##_pud(pud, start, cur_end);      \
 369         }                                                               \
 370         return ret;                                                     \
 371 }
 372
 373 /*
 374  * kvm_mips_mkclean_gpa_pt.
 375  * Mark a range of guest physical address space clean (writes fault) in the VM's
 376  * GPA page table to allow dirty page tracking.
 377  */
 378
 379 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
 380
 381 /**
 382  * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
 383  * @kvm:        KVM pointer.
 384  * @start_gfn:  Guest frame number of first page in GPA range to flush.
 385  * @end_gfn:    Guest frame number of last page in GPA range to flush.
 386  *
 387  * Make a range of GPA mappings clean so that guest writes will fault and
 388  * trigger dirty page logging.
 389  *
 390  * The caller must hold the @kvm->mmu_lock spinlock.
 391  *
 392  * Returns:     Whether any GPA mappings were modified, which would require
 393  *              derived mappings (GVA page tables & TLB enties) to be
 394  *              invalidated.
 395  */
 396 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
 397 {
 398         return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
 399                                     start_gfn << PAGE_SHIFT,
 400                                     end_gfn << PAGE_SHIFT);
 401 }
 402
 403 /**
 404  * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
 405  * @kvm:        The KVM pointer
 406  * @slot:       The memory slot associated with mask
 407  * @gfn_offset: The gfn offset in memory slot
 408  * @mask:       The mask of dirty pages at offset 'gfn_offset' in this memory
 409  *              slot to be write protected
 410  *
 411  * Walks bits set in mask write protects the associated pte's. Caller must
 412  * acquire @kvm->mmu_lock.
 413  */
 414 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 415                 struct kvm_memory_slot *slot,
 416                 gfn_t gfn_offset, unsigned long mask)
 417 {
 418         gfn_t base_gfn = slot->base_gfn + gfn_offset;
 419         gfn_t start = base_gfn +  __ffs(mask);
 420         gfn_t end = base_gfn + __fls(mask);
 421
 422         kvm_mips_mkclean_gpa_pt(kvm, start, end);
 423 }
 424
 425 /*
 426  * kvm_mips_mkold_gpa_pt.
 427  * Mark a range of guest physical address space old (all accesses fault) in the
 428  * VM's GPA page table to allow detection of commonly used pages.
 429  */
 430
 431 BUILD_PTE_RANGE_OP(mkold, pte_mkold)
 432
 433 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
 434                                  gfn_t end_gfn)
 435 {
 436         return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
 437                                   start_gfn << PAGE_SHIFT,
 438                                   end_gfn << PAGE_SHIFT);
 439 }
 440
 441 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 442 {
 443         kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
 444         return true;
 445 }
 446
 447 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 448 {
 449         gpa_t gpa = range->start << PAGE_SHIFT;
 450         pte_t hva_pte = range->arg.pte;
 451         pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 452         pte_t old_pte;
 453
 454         if (!gpa_pte)
 455                 return false;
 456
 457         /* Mapping may need adjusting depending on memslot flags */
 458         old_pte = *gpa_pte;
 459         if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
 460                 hva_pte = pte_mkclean(hva_pte);
 461         else if (range->slot->flags & KVM_MEM_READONLY)
 462                 hva_pte = pte_wrprotect(hva_pte);
 463
 464         set_pte(gpa_pte, hva_pte);
 465
 466         /* Replacing an absent or old page doesn't need flushes */
 467         if (!pte_present(old_pte) || !pte_young(old_pte))
 468                 return false;
 469
 470         /* Pages swapped, aged, moved, or cleaned require flushes */
 471         return !pte_present(hva_pte) ||
 472                !pte_young(hva_pte) ||
 473                pte_pfn(old_pte) != pte_pfn(hva_pte) ||
 474                (pte_dirty(old_pte) && !pte_dirty(hva_pte));
 475 }
 476
 477 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 478 {
 479         return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
 480 }
 481
 482 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 483 {
 484         gpa_t gpa = range->start << PAGE_SHIFT;
 485         pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 486
 487         if (!gpa_pte)
 488                 return false;
 489         return pte_young(*gpa_pte);
 490 }
 491
 492 /**
 493  * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
 494  * @vcpu:               VCPU pointer.
 495  * @gpa:                Guest physical address of fault.
 496  * @write_fault:        Whether the fault was due to a write.
 497  * @out_entry:          New PTE for @gpa (written on success unless NULL).
 498  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 499  *                      NULL).
 500  *
 501  * Perform fast path GPA fault handling, doing all that can be done without
 502  * calling into KVM. This handles marking old pages young (for idle page
 503  * tracking), and dirtying of clean pages (for dirty page logging).
 504  *
 505  * Returns:     0 on success, in which case we can update derived mappings and
 506  *              resume guest execution.
 507  *              -EFAULT on failure due to absent GPA mapping or write to
 508  *              read-only page, in which case KVM must be consulted.
 509  */
 510 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
 511                                    bool write_fault,
 512                                    pte_t *out_entry, pte_t *out_buddy)
 513 {
 514         struct kvm *kvm = vcpu->kvm;
 515         gfn_t gfn = gpa >> PAGE_SHIFT;
 516         pte_t *ptep;
 517         kvm_pfn_t pfn = 0;      /* silence bogus GCC warning */
 518         bool pfn_valid = false;
 519         int ret = 0;
 520
 521         spin_lock(&kvm->mmu_lock);
 522
 523         /* Fast path - just check GPA page table for an existing entry */
 524         ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 525         if (!ptep || !pte_present(*ptep)) {
 526                 ret = -EFAULT;
 527                 goto out;
 528         }
 529
 530         /* Track access to pages marked old */
 531         if (!pte_young(*ptep)) {
 532                 set_pte(ptep, pte_mkyoung(*ptep));
 533                 pfn = pte_pfn(*ptep);
 534                 pfn_valid = true;
 535                 /* call kvm_set_pfn_accessed() after unlock */
 536         }
 537         if (write_fault && !pte_dirty(*ptep)) {
 538                 if (!pte_write(*ptep)) {
 539                         ret = -EFAULT;
 540                         goto out;
 541                 }
 542
 543                 /* Track dirtying of writeable pages */
 544                 set_pte(ptep, pte_mkdirty(*ptep));
 545                 pfn = pte_pfn(*ptep);
 546                 mark_page_dirty(kvm, gfn);
 547                 kvm_set_pfn_dirty(pfn);
 548         }
 549
 550         if (out_entry)
 551                 *out_entry = *ptep;
 552         if (out_buddy)
 553                 *out_buddy = *ptep_buddy(ptep);
 554
 555 out:
 556         spin_unlock(&kvm->mmu_lock);
 557         if (pfn_valid)
 558                 kvm_set_pfn_accessed(pfn);
 559         return ret;
 560 }
 561
 562 /**
 563  * kvm_mips_map_page() - Map a guest physical page.
 564  * @vcpu:               VCPU pointer.
 565  * @gpa:                Guest physical address of fault.
 566  * @write_fault:        Whether the fault was due to a write.
 567  * @out_entry:          New PTE for @gpa (written on success unless NULL).
 568  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
 569  *                      NULL).
 570  *
 571  * Handle GPA faults by creating a new GPA mapping (or updating an existing
 572  * one).
 573  *
 574  * This takes care of marking pages young or dirty (idle/dirty page tracking),
 575  * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
 576  * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
 577  * caller.
 578  *
 579  * Returns:     0 on success, in which case the caller may use the @out_entry
 580  *              and @out_buddy PTEs to update derived mappings and resume guest
 581  *              execution.
 582  *              -EFAULT if there is no memory region at @gpa or a write was
 583  *              attempted to a read-only memory region. This is usually handled
 584  *              as an MMIO access.
 585  */
 586 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
 587                              bool write_fault,
 588                              pte_t *out_entry, pte_t *out_buddy)
 589 {
 590         struct kvm *kvm = vcpu->kvm;
 591         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 592         gfn_t gfn = gpa >> PAGE_SHIFT;
 593         int srcu_idx, err;
 594         kvm_pfn_t pfn;
 595         pte_t *ptep, entry, old_pte;
 596         bool writeable;
 597         unsigned long prot_bits;
 598         unsigned long mmu_seq;
 599
 600         /* Try the fast path to handle old / clean pages */
 601         srcu_idx = srcu_read_lock(&kvm->srcu);
 602         err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
 603                                       out_buddy);
 604         if (!err)
 605                 goto out;
 606
 607         /* We need a minimum of cached pages ready for page table creation */
 608         err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
 609         if (err)
 610                 goto out;
 611
 612 retry:
 613         /*
 614          * Used to check for invalidations in progress, of the pfn that is
 615          * returned by pfn_to_pfn_prot below.
 616          */
 617         mmu_seq = kvm->mmu_invalidate_seq;
 618         /*
 619          * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads
 620          * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
 621          * risk the page we get a reference to getting unmapped before we have a
 622          * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
 623          *
 624          * This smp_rmb() pairs with the effective smp_wmb() of the combination
 625          * of the pte_unmap_unlock() after the PTE is zapped, and the
 626          * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
 627          * mmu_invalidate_seq is incremented.
 628          */
 629         smp_rmb();
 630
 631         /* Slow path - ask KVM core whether we can access this GPA */
 632         pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
 633         if (is_error_noslot_pfn(pfn)) {
 634                 err = -EFAULT;
 635                 goto out;
 636         }
 637
 638         spin_lock(&kvm->mmu_lock);
 639         /* Check if an invalidation has taken place since we got pfn */
 640         if (mmu_invalidate_retry(kvm, mmu_seq)) {
 641                 /*
 642                  * This can happen when mappings are changed asynchronously, but
 643                  * also synchronously if a COW is triggered by
 644                  * gfn_to_pfn_prot().
 645                  */
 646                 spin_unlock(&kvm->mmu_lock);
 647                 kvm_release_pfn_clean(pfn);
 648                 goto retry;
 649         }
 650
 651         /* Ensure page tables are allocated */
 652         ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
 653
 654         /* Set up the PTE */
 655         prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
 656         if (writeable) {
 657                 prot_bits |= _PAGE_WRITE;
 658                 if (write_fault) {
 659                         prot_bits |= __WRITEABLE;
 660                         mark_page_dirty(kvm, gfn);
 661                         kvm_set_pfn_dirty(pfn);
 662                 }
 663         }
 664         entry = pfn_pte(pfn, __pgprot(prot_bits));
 665
 666         /* Write the PTE */
 667         old_pte = *ptep;
 668         set_pte(ptep, entry);
 669
 670         err = 0;
 671         if (out_entry)
 672                 *out_entry = *ptep;
 673         if (out_buddy)
 674                 *out_buddy = *ptep_buddy(ptep);
 675
 676         spin_unlock(&kvm->mmu_lock);
 677         kvm_release_pfn_clean(pfn);
 678         kvm_set_pfn_accessed(pfn);
 679 out:
 680         srcu_read_unlock(&kvm->srcu, srcu_idx);
 681         return err;
 682 }
 683
 684 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
 685                                       struct kvm_vcpu *vcpu,
 686                                       bool write_fault)
 687 {
 688         int ret;
 689
 690         ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
 691         if (ret)
 692                 return ret;
 693
 694         /* Invalidate this entry in the TLB */
 695         return kvm_vz_host_tlb_inv(vcpu, badvaddr);
 696 }
 697
 698 /**
 699  * kvm_mips_migrate_count() - Migrate timer.
 700  * @vcpu:       Virtual CPU.
 701  *
 702  * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
 703  * if it was running prior to being cancelled.
 704  *
 705  * Must be called when the VCPU is migrated to a different CPU to ensure that
 706  * timer expiry during guest execution interrupts the guest and causes the
 707  * interrupt to be delivered in a timely manner.
 708  */
 709 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
 710 {
 711         if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
 712                 hrtimer_restart(&vcpu->arch.comparecount_timer);
 713 }
 714
 715 /* Restore ASID once we are scheduled back after preemption */
 716 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 717 {
 718         unsigned long flags;
 719
 720         kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
 721
 722         local_irq_save(flags);
 723
 724         vcpu->cpu = cpu;
 725         if (vcpu->arch.last_sched_cpu != cpu) {
 726                 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
 727                           vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
 728                 /*
 729                  * Migrate the timer interrupt to the current CPU so that it
 730                  * always interrupts the guest and synchronously triggers a
 731                  * guest timer interrupt.
 732                  */
 733                 kvm_mips_migrate_count(vcpu);
 734         }
 735
 736         /* restore guest state to registers */
 737         kvm_mips_callbacks->vcpu_load(vcpu, cpu);
 738
 739         local_irq_restore(flags);
 740 }
 741
 742 /* ASID can change if another task is scheduled during preemption */
 743 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 744 {
 745         unsigned long flags;
 746         int cpu;
 747
 748         local_irq_save(flags);
 749
 750         cpu = smp_processor_id();
 751         vcpu->arch.last_sched_cpu = cpu;
 752         vcpu->cpu = -1;
 753
 754         /* save guest state in registers */
 755         kvm_mips_callbacks->vcpu_put(vcpu, cpu);
 756
 757         local_irq_restore(flags);
 758 }