#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>
-#include <linux/net_mm.h>
#include <trace/events/kmem.h>
} while (pgd++, addr = next, addr != end);
}
-void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked)
{
- MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
-
do {
unsigned long addr = vma->vm_start;
struct vm_area_struct *next;
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may
* be 0. This will underflow and is okay.
*/
- next = mas_find(&mas, ceiling - 1);
+ next = mas_find(mas, ceiling - 1);
/*
* Hide vma from rmap and truncate_pagecache before freeing
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
&& !is_vm_hugetlb_page(next)) {
vma = next;
- next = mas_find(&mas, ceiling - 1);
+ next = mas_find(mas, ceiling - 1);
if (mm_wr_locked)
vma_start_write(vma);
unlink_anon_vmas(vma);
return -EBUSY;
return -ENOENT;
} else if (is_pte_marker_entry(entry)) {
- if (is_swapin_error_entry(entry) || userfaultfd_wp(dst_vma))
- set_pte_at(dst_mm, addr, dst_pte, pte);
+ pte_marker marker = copy_pte_marker(entry, dst_vma);
+
+ if (marker)
+ set_pte_at(dst_mm, addr, dst_pte,
+ make_pte_marker(marker));
return 0;
}
if (!userfaultfd_wp(dst_vma))
* Use the raw variant of the seqcount_t write API to avoid
* lockdep complaining about preemptibility.
*/
- mmap_assert_write_locked(src_mm);
+ vma_assert_write_locked(src_vma);
raw_write_seqcount_begin(&src_mm->write_protect_seq);
}
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
+ arch_check_zapped_pte(vma, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
zap_install_uffd_wp_if_needed(vma, addr, pte, details,
ptent);
- if (unlikely(!page))
+ if (unlikely(!page)) {
+ ksm_might_unmap_zero_page(mm, ptent);
continue;
+ }
delay_rmap = 0;
if (!PageAnon(page)) {
!zap_drop_file_uffd_wp(details))
continue;
} else if (is_hwpoison_entry(entry) ||
- is_swapin_error_entry(entry)) {
+ is_poisoned_swp_entry(entry)) {
if (!should_zap_cows(details))
continue;
} else {
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlb: address of the caller's struct mmu_gather
- * @mt: the maple tree
+ * @mas: the maple state
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
+ * @tree_end: The maximum index to check
+ * @mm_wr_locked: lock flag
*
* Unmap all pages in the vma list.
*
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr,
- unsigned long end_addr, bool mm_wr_locked)
+ unsigned long end_addr, unsigned long tree_end,
+ bool mm_wr_locked)
{
struct mmu_notifier_range range;
struct zap_details details = {
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
- MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
start_addr, end_addr);
do {
unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
mm_wr_locked);
- } while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
+ } while ((vma = mas_find(mas, tree_end - 1)) != NULL);
mmu_notifier_invalidate_range_end(&range);
}
return retval;
}
-#ifdef pte_index
static int insert_page_in_batch_locked(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
}
/* insert_pages() amortizes the cost of spinlock operations
- * when inserting pages in a loop. Arch *must* define pte_index.
+ * when inserting pages in a loop.
*/
static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num, pgprot_t prot)
*num = remaining_pages_total;
return ret;
}
-#endif /* ifdef pte_index */
/**
* vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num)
{
-#ifdef pte_index
const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;
if (addr < vma->vm_start || end_addr >= vma->vm_end)
}
/* Defer page refcount checking till we're about to map that page. */
return insert_pages(vma, addr, pages, num, vma->vm_page_prot);
-#else
- unsigned long idx = 0, pgcount = *num;
- int err = -EINVAL;
-
- for (; idx < pgcount; ++idx) {
- err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]);
- if (err)
- break;
- }
- *num = pgcount - idx;
- return err;
-#endif /* ifdef pte_index */
}
EXPORT_SYMBOL(vm_insert_pages);
entry = pte_mkyoung(vmf->orig_pte);
if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
- update_mmu_cache(vma, addr, vmf->pte);
+ update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
}
/*
*
* We do this without the lock held, so that it can sleep if it needs to.
*/
-static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t do_page_mkwrite(struct vm_fault *vmf, struct folio *folio)
{
vm_fault_t ret;
- struct page *page = vmf->page;
unsigned int old_flags = vmf->flags;
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
- lock_page(page);
- if (!page->mapping) {
- unlock_page(page);
+ folio_lock(folio);
+ if (!folio->mapping) {
+ folio_unlock(folio);
return 0; /* retry */
}
ret |= VM_FAULT_LOCKED;
} else
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
return ret;
}
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping;
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
bool dirtied;
bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite;
- dirtied = set_page_dirty(page);
- VM_BUG_ON_PAGE(PageAnon(page), page);
+ dirtied = folio_mark_dirty(folio);
+ VM_BUG_ON_FOLIO(folio_test_anon(folio), folio);
/*
- * Take a local copy of the address_space - page.mapping may be zeroed
- * by truncate after unlock_page(). The address_space itself remains
- * pinned by vma->vm_file's reference. We rely on unlock_page()'s
+ * Take a local copy of the address_space - folio.mapping may be zeroed
+ * by truncate after folio_unlock(). The address_space itself remains
+ * pinned by vma->vm_file's reference. We rely on folio_unlock()'s
* release semantics to prevent the compiler from undoing this copying.
*/
- mapping = page_rmapping(page);
- unlock_page(page);
+ mapping = folio_raw_mapping(folio);
+ folio_unlock(folio);
if (!page_mkwrite)
file_update_time(vma->vm_file);
entry = pte_mkyoung(vmf->orig_pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
pte_unmap_unlock(vmf->pte, vmf->ptl);
count_vm_event(PGREUSE);
}
inc_mm_counter(mm, MM_ANONPAGES);
}
} else {
+ ksm_might_unmap_zero_page(mm, vmf->orig_pte);
inc_mm_counter(mm, MM_ANONPAGES);
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
* that left a window where the new PTE could be loaded into
* some TLBs while the old PTE remains in others.
*/
- ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
+ ptep_clear_flush(vma, vmf->address, vmf->pte);
folio_add_new_anon_rmap(new_folio, vma, vmf->address);
folio_add_lru_vma(new_folio, vma);
/*
*/
BUG_ON(unshare && pte_write(entry));
set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
if (old_folio) {
/*
* Only after switching the pte to the new page may
pte_unmap_unlock(vmf->pte, vmf->ptl);
}
- /*
- * No need to double call mmu_notifier->invalidate_range() callback as
- * the above ptep_clear_flush_notify() did already call it.
- */
- mmu_notifier_invalidate_range_only_end(&range);
+ mmu_notifier_invalidate_range_end(&range);
if (new_folio)
folio_put(new_folio);
vm_fault_t ret;
pte_unmap_unlock(vmf->pte, vmf->ptl);
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ vma_end_read(vmf->vma);
+ return VM_FAULT_RETRY;
+ }
+
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
return 0;
}
-static vm_fault_t wp_page_shared(struct vm_fault *vmf)
+static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
__releases(vmf->ptl)
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0;
- get_page(vmf->page);
+ folio_get(folio);
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
vm_fault_t tmp;
pte_unmap_unlock(vmf->pte, vmf->ptl);
- tmp = do_page_mkwrite(vmf);
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ folio_put(folio);
+ vma_end_read(vmf->vma);
+ return VM_FAULT_RETRY;
+ }
+
+ tmp = do_page_mkwrite(vmf, folio);
if (unlikely(!tmp || (tmp &
(VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
- put_page(vmf->page);
+ folio_put(folio);
return tmp;
}
tmp = finish_mkwrite_fault(vmf);
if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
- unlock_page(vmf->page);
- put_page(vmf->page);
+ folio_unlock(folio);
+ folio_put(folio);
return tmp;
}
} else {
wp_page_reuse(vmf);
- lock_page(vmf->page);
+ folio_lock(folio);
}
ret |= fault_dirty_shared_page(vmf);
- put_page(vmf->page);
+ folio_put(folio);
return ret;
}
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
+ if (vmf->page)
+ folio = page_folio(vmf->page);
+
/*
* Shared mapping: we are guaranteed to have VM_WRITE and
* FAULT_FLAG_WRITE set at this point.
*/
if (!vmf->page)
return wp_pfn_shared(vmf);
- return wp_page_shared(vmf);
+ return wp_page_shared(vmf, folio);
}
- if (vmf->page)
- folio = page_folio(vmf->page);
-
/*
* Private mapping: create an exclusive anonymous page copy if reuse
* is impossible. We might miss VM_WRITE for FOLL_FORCE handling.
return 0;
}
copy:
+ if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ vma_end_read(vmf->vma);
+ return VM_FAULT_RETRY;
+ }
+
/*
* Ok, we need to copy. Oh, well..
*/
VM_BUG_ON(!folio_test_locked(folio));
first_index = folio->index;
- last_index = folio->index + folio_nr_pages(folio) - 1;
+ last_index = folio_next_index(folio) - 1;
details.even_cows = false;
details.single_folio = folio;
struct folio *folio = page_folio(vmf->page);
struct vm_area_struct *vma = vmf->vma;
struct mmu_notifier_range range;
+ vm_fault_t ret;
/*
* We need a reference to lock the folio because we don't hold
if (!folio_try_get(folio))
return 0;
- if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
+ ret = folio_lock_or_retry(folio, vmf);
+ if (ret) {
folio_put(folio);
- return VM_FAULT_RETRY;
+ return ret;
}
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
vma->vm_mm, vmf->address & PAGE_MASK,
* none pte. Otherwise it means the pte could have changed, so retry.
*
* This should also cover the case where e.g. the pte changed
- * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR.
+ * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED.
* So is_pte_marker() check is not enough to safely drop the pte.
*/
if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
return VM_FAULT_SIGBUS;
/* Higher priority than uffd-wp when data corrupted */
- if (marker & PTE_MARKER_SWAPIN_ERROR)
- return VM_FAULT_SIGBUS;
+ if (marker & PTE_MARKER_POISONED)
+ return VM_FAULT_HWPOISON;
if (pte_marker_entry_uffd_wp(entry))
return pte_marker_handle_uffd_wp(vmf);
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
- int locked;
vm_fault_t ret = 0;
void *shadow = NULL;
if (!pte_unmap_same(vmf))
goto out;
- if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
- ret = VM_FAULT_RETRY;
- goto out;
- }
-
entry = pte_to_swp_entry(vmf->orig_pte);
if (unlikely(non_swap_entry(entry))) {
if (is_migration_entry(entry)) {
vmf->page = pfn_swap_entry_to_page(entry);
ret = remove_device_exclusive_entry(vmf);
} else if (is_device_private_entry(entry)) {
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ /*
+ * migrate_to_ram is not yet ready to operate
+ * under VMA lock.
+ */
+ vma_end_read(vma);
+ ret = VM_FAULT_RETRY;
+ goto out;
+ }
+
vmf->page = pfn_swap_entry_to_page(entry);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
folio_add_lru(folio);
/* To provide entry to swap_readpage() */
- folio_set_swap_entry(folio, entry);
+ folio->swap = entry;
swap_readpage(page, true, NULL);
folio->private = NULL;
}
goto out_release;
}
- locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
-
- if (!locked) {
- ret |= VM_FAULT_RETRY;
+ ret |= folio_lock_or_retry(folio, vmf);
+ if (ret & VM_FAULT_RETRY)
goto out_release;
- }
if (swapcache) {
/*
* changed.
*/
if (unlikely(!folio_test_swapcache(folio) ||
- page_private(page) != entry.val))
+ page_swap_entry(page).val != entry.val))
goto out_page;
/*
}
/* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
unlock:
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
entry = mk_pte(&folio->page, vma->vm_page_prot);
entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
/* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
unlock:
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
pmd_t entry;
- int i;
vm_fault_t ret = VM_FAULT_FALLBACK;
if (!transhuge_vma_suitable(vma, haddr))
if (unlikely(!pmd_none(*vmf->pmd)))
goto out;
- for (i = 0; i < HPAGE_PMD_NR; i++)
- flush_icache_page(vma, page + i);
+ flush_icache_pages(vma, page, HPAGE_PMD_NR);
entry = mk_huge_pmd(page, vma->vm_page_prot);
if (write)
}
#endif
-void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
+/**
+ * set_pte_range - Set a range of PTEs to point to pages in a folio.
+ * @vmf: Fault decription.
+ * @folio: The folio that contains @page.
+ * @page: The first page to create a PTE for.
+ * @nr: The number of PTEs to create.
+ * @addr: The first address to create a PTE for.
+ */
+void set_pte_range(struct vm_fault *vmf, struct folio *folio,
+ struct page *page, unsigned int nr, unsigned long addr)
{
struct vm_area_struct *vma = vmf->vma;
bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
bool write = vmf->flags & FAULT_FLAG_WRITE;
- bool prefault = vmf->address != addr;
+ bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
pte_t entry;
- flush_icache_page(vma, page);
+ flush_icache_pages(vma, page, nr);
entry = mk_pte(page, vma->vm_page_prot);
if (prefault && arch_wants_old_prefaulted_pte())
entry = pte_mkuffd_wp(entry);
/* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) {
- inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, addr);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr);
+ VM_BUG_ON_FOLIO(nr != 1, folio);
+ folio_add_new_anon_rmap(folio, vma, addr);
+ folio_add_lru_vma(folio, vma);
} else {
- inc_mm_counter(vma->vm_mm, mm_counter_file(page));
- page_add_file_rmap(page, vma, false);
+ add_mm_counter(vma->vm_mm, mm_counter_file(page), nr);
+ folio_add_file_rmap_range(folio, page, nr, vma, false);
}
- set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
+ set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);
+
+ /* no need to invalidate: a not-present page won't be cached */
+ update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr);
}
static bool vmf_pte_changed(struct vm_fault *vmf)
/* Re-check under ptl */
if (likely(!vmf_pte_changed(vmf))) {
- do_set_pte(vmf, page, vmf->address);
-
- /* no need to invalidate: a not-present page won't be cached */
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ struct folio *folio = page_folio(page);
+ set_pte_range(vmf, folio, page, 1, vmf->address);
ret = 0;
} else {
update_mmu_tlb(vma, vmf->address, vmf->pte);
static vm_fault_t do_read_fault(struct vm_fault *vmf)
{
vm_fault_t ret = 0;
+ struct folio *folio;
/*
* Let's call ->map_pages() first and use ->fault() as fallback
return ret;
}
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ vma_end_read(vmf->vma);
+ return VM_FAULT_RETRY;
+ }
+
ret = __do_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
ret |= finish_fault(vmf);
- unlock_page(vmf->page);
+ folio = page_folio(vmf->page);
+ folio_unlock(folio);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
- put_page(vmf->page);
+ folio_put(folio);
return ret;
}
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret;
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ vma_end_read(vma);
+ return VM_FAULT_RETRY;
+ }
+
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret, tmp;
+ struct folio *folio;
+
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+ vma_end_read(vma);
+ return VM_FAULT_RETRY;
+ }
ret = __do_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
+ folio = page_folio(vmf->page);
+
/*
* Check if the backing address space wants to know that the page is
* about to become writable
*/
if (vma->vm_ops->page_mkwrite) {
- unlock_page(vmf->page);
- tmp = do_page_mkwrite(vmf);
+ folio_unlock(folio);
+ tmp = do_page_mkwrite(vmf, folio);
if (unlikely(!tmp ||
(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
- put_page(vmf->page);
+ folio_put(folio);
return tmp;
}
}
ret |= finish_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
VM_FAULT_RETRY))) {
- unlock_page(vmf->page);
- put_page(vmf->page);
+ folio_unlock(folio);
+ folio_put(folio);
return ret;
}
pte = pte_modify(old_pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (writable)
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
- update_mmu_cache(vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
- if (vma_is_anonymous(vmf->vma))
+ struct vm_area_struct *vma = vmf->vma;
+ if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(vmf);
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+ if (vma->vm_ops->huge_fault)
+ return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
return VM_FAULT_FALLBACK;
}
/* `inline' is required to avoid gcc 4.1.2 build error */
static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
vm_fault_t ret;
- if (vma_is_anonymous(vmf->vma)) {
+ if (vma_is_anonymous(vma)) {
if (likely(!unshare) &&
- userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd))
+ userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd))
return handle_userfault(vmf, VM_UFFD_WP);
return do_huge_pmd_wp_page(vmf);
}
- if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
- if (vmf->vma->vm_ops->huge_fault) {
- ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+ if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+ if (vma->vm_ops->huge_fault) {
+ ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
}
}
/* COW or write-notify handled on pte level: split pmd. */
- __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
+ __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);
return VM_FAULT_FALLBACK;
}
{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ struct vm_area_struct *vma = vmf->vma;
/* No support for anonymous transparent PUD pages yet */
- if (vma_is_anonymous(vmf->vma))
+ if (vma_is_anonymous(vma))
return VM_FAULT_FALLBACK;
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+ if (vma->vm_ops->huge_fault)
+ return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
return VM_FAULT_FALLBACK;
}
{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret;
/* No support for anonymous transparent PUD pages yet */
- if (vma_is_anonymous(vmf->vma))
+ if (vma_is_anonymous(vma))
goto split;
- if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
- if (vmf->vma->vm_ops->huge_fault) {
- ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+ if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+ if (vma->vm_ops->huge_fault) {
+ ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
}
}
split:
/* COW or write-notify not handled on PUD level: split pud.*/
- __split_huge_pud(vmf->vma, vmf->pud, vmf->address);
+ __split_huge_pud(vma, vmf->pud, vmf->address);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
return VM_FAULT_FALLBACK;
}
entry = pte_mkyoung(entry);
if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
vmf->flags & FAULT_FLAG_WRITE)) {
- update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
+ update_mmu_cache_range(vmf, vmf->vma, vmf->address,
+ vmf->pte, 1);
} else {
/* Skip spurious TLB flush for retried page fault */
if (vmf->flags & FAULT_FLAG_TRIED)
}
/*
- * By the time we get here, we already hold the mm semaphore
- *
- * The mmap_lock may have been released depending on flags and our
- * return value. See filemap_fault() and __folio_lock_or_retry().
+ * On entry, we hold either the VMA lock or the mmap_lock
+ * (FAULT_FLAG_VMA_LOCK tells you which). If VM_FAULT_RETRY is set in
+ * the result, the mmap_lock is not held on exit. See filemap_fault()
+ * and __folio_lock_or_retry().
*/
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
/**
* mm_account_fault - Do page fault accounting
- *
+ * @mm: mm from which memcg should be extracted. It can be NULL.
* @regs: the pt_regs struct pointer. When set to NULL, will skip accounting
* of perf event counters, but we'll still do the per-task accounting to
* the task who triggered this page fault.
!is_cow_mapping(vma->vm_flags)))
return VM_FAULT_SIGSEGV;
}
+#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of
+ * the assumption that lock is dropped on VM_FAULT_RETRY.
+ */
+ if (WARN_ON_ONCE((*flags &
+ (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)) ==
+ (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)))
+ return VM_FAULT_SIGSEGV;
+#endif
+
return 0;
}
static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
{
- /* Even if this succeeds, make it clear we *might* have slept */
- if (likely(mmap_read_trylock(mm))) {
- might_sleep();
+ if (likely(mmap_read_trylock(mm)))
return true;
- }
if (regs && !user_mode(regs)) {
unsigned long ip = instruction_pointer(regs);
if (!vma)
goto inval;
- /* Only anonymous and tcp vmas are supported for now */
- if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
- goto inval;
-
if (!vma_start_read(vma))
goto inval;
* concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
* from its anon_vma.
*/
- if (unlikely(!vma->anon_vma && !vma_is_tcp(vma)))
- goto inval_end_read;
-
- /*
- * Due to the possibility of userfault handler dropping mmap_lock, avoid
- * it for now and fall back to page fault handling under mmap_lock.
- */
- if (userfaultfd_armed(vma))
+ if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma))
goto inval_end_read;
/* Check since vm_start/vm_end might change before we lock the VMA */
SLAB_PANIC, NULL);
}
-bool ptlock_alloc(struct page *page)
+bool ptlock_alloc(struct ptdesc *ptdesc)
{
spinlock_t *ptl;
ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL);
if (!ptl)
return false;
- page->ptl = ptl;
+ ptdesc->ptl = ptl;
return true;
}
-void ptlock_free(struct page *page)
+void ptlock_free(struct ptdesc *ptdesc)
{
- kmem_cache_free(page_ptl_cachep, page->ptl);
+ kmem_cache_free(page_ptl_cachep, ptdesc->ptl);
}
#endif