There's an established convention in the kernel that we treat PTEs as
containing swap entries (and the unfortunately named non-swap swap
entries) should they be neither empty (i.e. pte_none() evaluating true)
nor present (i.e. pte_present() evaluating true).
However, there is some inconsistency in how this is applied, as we also
have the is_swap_pte() helper which explicitly performs this check:
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
return !pte_none(pte) && !pte_present(pte);
}
As this represents a predicate, and it's logical to assume that in order
to establish that a PTE entry can correctly be manipulated as a
swap/non-swap entry, this predicate seems as if it must first be checked.
But we instead, we far more often utilise the established convention of
checking pte_none() / pte_present() before operating on entries as if they
were swap/non-swap.
This patch works towards correcting this inconsistency by removing all
uses of is_swap_pte() where we are already in a position where we perform
pte_none()/pte_present() checks anyway or otherwise it is clearly logical
to do so.
We also take advantage of the fact that pte_swp_uffd_wp() is only set on
swap entries.
Additionally, update comments referencing to is_swap_pte() and
non_swap_entry().
No functional change intended.
Link: https://lkml.kernel.org/r/17fd6d7f46a846517fd455fadd640af47fcd7c55.1762812360.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
- } else if (is_swap_pte(ptent)) {
+ } else if (pte_none(ptent)) {
+ smaps_pte_hole_lookup(addr, walk);
+ } else {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (!non_swap_entry(swpent)) {
present = true;
page = pfn_swap_entry_to_page(swpent);
}
- } else {
- smaps_pte_hole_lookup(addr, walk);
- return;
}
if (!page)
*/
pte_t ptent = ptep_get(pte);
+ if (pte_none(ptent))
+ return;
+
if (pte_present(ptent)) {
pte_t old_pte;
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
+ } else {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
struct page *page = NULL;
struct folio *folio;
+ if (pte_none(pte))
+ goto out;
+
if (pte_present(pte)) {
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
- } else if (is_swap_pte(pte)) {
+ } else {
swp_entry_t entry;
+
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_swp_uffd_wp(pte))
entry = pte_to_swp_entry(pte);
if (pm->show_pfn) {
pgoff_t offset;
+
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
__folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
+
+out:
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
- unsigned long categories = 0;
+ unsigned long categories;
+
+ if (pte_none(pte))
+ return 0;
if (pte_present(pte)) {
struct page *page;
- categories |= PAGE_IS_PRESENT;
+ categories = PAGE_IS_PRESENT;
+
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
softleaf_t entry;
- categories |= PAGE_IS_SWAPPED;
+ categories = PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
- ptent = pte_swp_mkuffd_wp(ptent);
- set_pte_at(vma->vm_mm, addr, pte, ptent);
- } else {
+ } else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
+ } else {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pte_none(pte))
+ return categories;
+
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
+
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
categories |= PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
static inline bool pte_swp_uffd_wp_any(pte_t pte)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- if (!is_swap_pte(pte))
+ if (pte_present(pte))
return false;
-
if (pte_swp_uffd_wp(pte))
return true;
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
- if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte))
+ if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) {
huge_pte_clear(mm, new_addr, dst_pte, sz);
- else {
+ } else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = huge_pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
/**
* pte_move_swp_offset - Move the swap entry offset field of a swap pte
* forward or backward by delta
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry
* @delta: The direction and the offset we are moving; forward if delta
* is positive; backward if delta is negative
*
/**
* pte_next_swp_offset - Increment the swap entry offset field of a swap pte.
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry.
*
* Increments the swap offset, while maintaining all other fields, including
* swap type, and any swp pte bits. The resulting pte is returned.
}
vmf.orig_pte = ptep_get_lockless(pte);
- if (!is_swap_pte(vmf.orig_pte))
+ if (pte_none(vmf.orig_pte) ||
+ pte_present(vmf.orig_pte))
continue;
vmf.pte = pte;
for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
- if (is_swap_pte(pteval)) {
+ if (pte_none_or_zero(pteval)) {
+ ++none_or_zero;
+ if (!userfaultfd_armed(vma) &&
+ (!cc->is_khugepaged ||
+ none_or_zero <= khugepaged_max_ptes_none)) {
+ continue;
+ } else {
+ result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ goto out_unmap;
+ }
+ }
+ if (!pte_present(pteval)) {
++unmapped;
if (!cc->is_khugepaged ||
unmapped <= khugepaged_max_ptes_swap) {
goto out_unmap;
}
}
- if (pte_none_or_zero(pteval)) {
- ++none_or_zero;
- if (!userfaultfd_armed(vma) &&
- (!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
- continue;
- } else {
- result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
- goto out_unmap;
- }
- }
if (pte_uffd_wp(pteval)) {
/*
* Don't collapse the page if any of the small
pte = ptep_get(ptep);
pte_unmap(ptep);
- if (!is_swap_pte(pte))
+ if (pte_none(pte) || pte_present(pte))
goto out;
entry = pte_to_swp_entry(pte);
prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
pages += nr_ptes;
- } else if (is_swap_pte(oldpte)) {
+ } else if (pte_none(oldpte)) {
+ /*
+ * Nobody plays with any none ptes besides
+ * userfaultfd when applying the protections.
+ */
+ if (likely(!uffd_wp))
+ continue;
+
+ if (userfaultfd_wp_use_markers(vma)) {
+ /*
+ * For file-backed mem, we need to be able to
+ * wr-protect a none pte, because even if the
+ * pte is none, the page/swap cache could
+ * exist. Doing that by install a marker.
+ */
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ pages++;
+ }
+ } else {
swp_entry_t entry = pte_to_swp_entry(oldpte);
pte_t newpte;
set_pte_at(vma->vm_mm, addr, pte, newpte);
pages++;
}
- } else {
- /* It must be an none page, or what else?.. */
- WARN_ON_ONCE(!pte_none(oldpte));
-
- /*
- * Nobody plays with any none ptes besides
- * userfaultfd when applying the protections.
- */
- if (likely(!uffd_wp))
- continue;
-
- if (userfaultfd_wp_use_markers(vma)) {
- /*
- * For file-backed mem, we need to be able to
- * wr-protect a none pte, because even if the
- * pte is none, the page/swap cache could
- * exist. Doing that by install a marker.
- */
- set_pte_at(vma->vm_mm, addr, pte,
- make_pte_marker(PTE_MARKER_UFFD_WP));
- pages++;
- }
}
} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
static pte_t move_soft_dirty_pte(pte_t pte)
{
+ if (pte_none(pte))
+ return pte;
+
/*
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
#ifdef CONFIG_MEM_SOFT_DIRTY
if (pte_present(pte))
pte = pte_mksoft_dirty(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_mksoft_dirty(pte);
#endif
return pte;
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
is_writable_migration_entry(entry);
}
-static inline void page_table_check_pte_flags(pte_t pte)
+static void page_table_check_pte_flags(pte_t pte)
{
- if (pte_present(pte) && pte_uffd_wp(pte))
- WARN_ON_ONCE(pte_write(pte));
- else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
- WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
+ if (pte_present(pte)) {
+ WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte));
+ } else if (pte_swp_uffd_wp(pte)) {
+ const swp_entry_t entry = pte_to_swp_entry(pte);
+
+ WARN_ON_ONCE(swap_cached_writable(entry));
+ }
}
void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
spinlock_t **ptlp)
{
+ bool is_migration;
pte_t ptent;
if (pvmw->flags & PVMW_SYNC) {
return !!pvmw->pte;
}
+ is_migration = pvmw->flags & PVMW_MIGRATION;
again:
/*
* It is important to return the ptl corresponding to pte,
ptent = ptep_get(pvmw->pte);
- if (pvmw->flags & PVMW_MIGRATION) {
- if (!is_swap_pte(ptent))
+ if (pte_none(ptent)) {
+ return false;
+ } else if (pte_present(ptent)) {
+ if (is_migration)
return false;
- } else if (is_swap_pte(ptent)) {
+ } else if (!is_migration) {
swp_entry_t entry;
+
/*
* Handle un-addressable ZONE_DEVICE memory.
*
if (!is_device_private_entry(entry) &&
!is_device_exclusive_entry(entry))
return false;
- } else if (!pte_present(ptent)) {
- return false;
}
spin_lock(*ptlp);
if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
return false;
pfn = softleaf_to_pfn(entry);
- } else if (is_swap_pte(ptent)) {
- swp_entry_t entry;
+ } else if (pte_present(ptent)) {
+ pfn = pte_pfn(ptent);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
/* Handle un-addressable ZONE_DEVICE memory */
- entry = pte_to_swp_entry(ptent);
- if (!is_device_private_entry(entry) &&
- !is_device_exclusive_entry(entry))
- return false;
-
- pfn = swp_offset_pfn(entry);
- } else {
- if (!pte_present(ptent))
+ if (!softleaf_is_device_private(entry) &&
+ !softleaf_is_device_exclusive(entry))
return false;
- pfn = pte_pfn(ptent);
+ pfn = softleaf_to_pfn(entry);
}
if ((pfn + pte_nr - 1) < pvmw->pfn)