}
static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
- pte_t pte, int max_nr_ptes)
+ pte_t pte, int max_nr_ptes, fpb_t flags)
{
/* No underlying folio, so cannot batch */
if (!folio)
if (!folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, ptep, pte, max_nr_ptes);
+ return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags);
}
static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
+/* Set nr_ptes number of ptes, starting from idx */
+static void prot_commit_flush_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes,
+ int idx, bool set_write, struct mmu_gather *tlb)
+{
+ /*
+ * Advance the position in the batch by idx; note that if idx > 0,
+ * then the nr_ptes passed here is <= batch size - idx.
+ */
+ addr += idx * PAGE_SIZE;
+ ptep += idx;
+ oldpte = pte_advance_pfn(oldpte, idx);
+ ptent = pte_advance_pfn(ptent, idx);
+
+ if (set_write)
+ ptent = pte_mkwrite(ptent, vma);
+
+ modify_prot_commit_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes);
+ if (pte_needs_flush(oldpte, ptent))
+ tlb_flush_pte_range(tlb, addr, nr_ptes * PAGE_SIZE);
+}
+
+/*
+ * Get max length of consecutive ptes pointing to PageAnonExclusive() pages or
+ * !PageAnonExclusive() pages, starting from start_idx. Caller must enforce
+ * that the ptes point to consecutive pages of the same anon large folio.
+ */
+static int page_anon_exclusive_sub_batch(int start_idx, int max_len,
+ struct page *first_page, bool expected_anon_exclusive)
+{
+ int idx;
+
+ for (idx = start_idx + 1; idx < start_idx + max_len; ++idx) {
+ if (expected_anon_exclusive != PageAnonExclusive(first_page + idx))
+ break;
+ }
+ return idx - start_idx;
+}
+
+/*
+ * This function is a result of trying our very best to retain the
+ * "avoid the write-fault handler" optimization. In can_change_pte_writable(),
+ * if the vma is a private vma, and we cannot determine whether to change
+ * the pte to writable just from the vma and the pte, we then need to look
+ * at the actual page pointed to by the pte. Unfortunately, if we have a
+ * batch of ptes pointing to consecutive pages of the same anon large folio,
+ * the anon-exclusivity (or the negation) of the first page does not guarantee
+ * the anon-exclusivity (or the negation) of the other pages corresponding to
+ * the pte batch; hence in this case it is incorrect to decide to change or
+ * not change the ptes to writable just by using information from the first
+ * pte of the batch. Therefore, we must individually check all pages and
+ * retrieve sub-batches.
+ */
+static void commit_anon_folio_batch(struct vm_area_struct *vma,
+ struct folio *folio, unsigned long addr, pte_t *ptep,
+ pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
+{
+ struct page *first_page = folio_page(folio, 0);
+ bool expected_anon_exclusive;
+ int sub_batch_idx = 0;
+ int len;
+
+ while (nr_ptes) {
+ expected_anon_exclusive = PageAnonExclusive(first_page + sub_batch_idx);
+ len = page_anon_exclusive_sub_batch(sub_batch_idx, nr_ptes,
+ first_page, expected_anon_exclusive);
+ prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, len,
+ sub_batch_idx, expected_anon_exclusive, tlb);
+ sub_batch_idx += len;
+ nr_ptes -= len;
+ }
+}
+
+static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma,
+ struct folio *folio, unsigned long addr, pte_t *ptep,
+ pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb)
+{
+ bool set_write;
+
+ if (vma->vm_flags & VM_SHARED) {
+ set_write = can_change_shared_pte_writable(vma, ptent);
+ prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes,
+ /* idx = */ 0, set_write, tlb);
+ return;
+ }
+
+ set_write = maybe_change_pte_writable(vma, ptent) &&
+ (folio && folio_test_anon(folio));
+ if (!set_write) {
+ prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes,
+ /* idx = */ 0, set_write, tlb);
+ return;
+ }
+ commit_anon_folio_batch(vma, folio, addr, ptep, oldpte, ptent, nr_ptes, tlb);
+}
+
static long change_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
nr_ptes = 1;
oldpte = ptep_get(pte);
if (pte_present(oldpte)) {
+ const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE;
int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
- struct folio *folio;
+ struct folio *folio = NULL;
pte_t ptent;
/*
/* determine batch to skip */
nr_ptes = mprotect_folio_pte_batch(folio,
- pte, oldpte, max_nr_ptes);
+ pte, oldpte, max_nr_ptes, /* flags = */ 0);
continue;
}
}
+ if (!folio)
+ folio = vm_normal_folio(vma, addr, oldpte);
+
+ nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags);
+
oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes);
ptent = pte_modify(oldpte, newprot);
* COW or special handling is required.
*/
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
- !pte_write(ptent) &&
- can_change_pte_writable(vma, addr, ptent))
- ptent = pte_mkwrite(ptent, vma);
-
- modify_prot_commit_ptes(vma, addr, pte, oldpte, ptent, nr_ptes);
- if (pte_needs_flush(oldpte, ptent))
- tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
- pages++;
+ !pte_write(ptent))
+ set_write_prot_commit_flush_ptes(vma, folio,
+ addr, pte, oldpte, ptent, nr_ptes, tlb);
+ else
+ prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
+ nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
+ pages += nr_ptes;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
pte_t newpte;