#include <linux/kfence.h>
#include <linux/pkeys.h>
#include <linux/mm_inline.h>
+#include <linux/pagewalk.h>
+#include <linux/stop_machine.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
#define INVALID_PHYS_ADDR (-1ULL)
-static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
+static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
enum pgtable_type pgtable_type)
{
/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
- struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
+ struct ptdesc *ptdesc = pagetable_alloc(gfp & ~__GFP_ZERO, 0);
phys_addr_t pa;
if (!ptdesc)
}
static phys_addr_t
-try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
+try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp)
{
- return __pgd_pgtable_alloc(&init_mm, pgtable_type);
+ return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type);
}
static phys_addr_t __maybe_unused
{
phys_addr_t pa;
- pa = __pgd_pgtable_alloc(&init_mm, pgtable_type);
+ pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type);
BUG_ON(pa == INVALID_PHYS_ADDR);
return pa;
}
{
phys_addr_t pa;
- pa = __pgd_pgtable_alloc(NULL, pgtable_type);
+ pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
BUG_ON(pa == INVALID_PHYS_ADDR);
return pa;
}
__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
}
-static int split_pmd(pmd_t *pmdp, pmd_t pmd)
+static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
{
pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
unsigned long pfn = pmd_pfn(pmd);
pte_t *ptep;
int i;
- pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE);
+ pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp);
if (pte_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
ptep = (pte_t *)phys_to_virt(pte_phys);
tableprot |= PMD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE);
- prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
+ if (to_cont)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
__set_pte(ptep, pfn_pte(pfn, prot));
set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
}
-static int split_pud(pud_t *pudp, pud_t pud)
+static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
{
pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
unsigned int step = PMD_SIZE >> PAGE_SHIFT;
pmd_t *pmdp;
int i;
- pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD);
+ pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp);
if (pmd_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
pmdp = (pmd_t *)phys_to_virt(pmd_phys);
tableprot |= PUD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
- prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
+ if (to_cont)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
set_pmd(pmdp, pfn_pmd(pfn, prot));
if (!pud_present(pud))
goto out;
if (pud_leaf(pud)) {
- ret = split_pud(pudp, pud);
+ ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
*/
if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
goto out;
- ret = split_pmd(pmdp, pmd);
+ ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
return ret;
}
+static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
+ unsigned long next,
+ struct mm_walk *walk)
+{
+ pud_t pud = pudp_get(pudp);
+ int ret = 0;
+
+ if (pud_leaf(pud))
+ ret = split_pud(pudp, pud, GFP_ATOMIC, false);
+
+ return ret;
+}
+
+static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
+ unsigned long next,
+ struct mm_walk *walk)
+{
+ pmd_t pmd = pmdp_get(pmdp);
+ int ret = 0;
+
+ if (pmd_leaf(pmd)) {
+ if (pmd_cont(pmd))
+ split_contpmd(pmdp);
+ ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
+
+ /*
+ * We have split the pmd directly to ptes so there is no need to
+ * visit each pte to check if they are contpte.
+ */
+ walk->action = ACTION_CONTINUE;
+ }
+
+ return ret;
+}
+
+static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
+ unsigned long next,
+ struct mm_walk *walk)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (pte_cont(pte))
+ split_contpte(ptep);
+
+ return 0;
+}
+
+static const struct mm_walk_ops split_to_ptes_ops __initconst = {
+ .pud_entry = split_to_ptes_pud_entry,
+ .pmd_entry = split_to_ptes_pmd_entry,
+ .pte_entry = split_to_ptes_pte_entry,
+};
+
+static bool linear_map_requires_bbml2 __initdata;
+
+u32 idmap_kpti_bbml2_flag;
+
+void __init init_idmap_kpti_bbml2_flag(void)
+{
+ WRITE_ONCE(idmap_kpti_bbml2_flag, 1);
+ /* Must be visible to other CPUs before stop_machine() is called. */
+ smp_mb();
+}
+
+static int __init linear_map_split_to_ptes(void *__unused)
+{
+ /*
+ * Repainting the linear map must be done by CPU0 (the boot CPU) because
+ * that's the only CPU that we know supports BBML2. The other CPUs will
+ * be held in a waiting area with the idmap active.
+ */
+ if (!smp_processor_id()) {
+ unsigned long lstart = _PAGE_OFFSET(vabits_actual);
+ unsigned long lend = PAGE_END;
+ unsigned long kstart = (unsigned long)lm_alias(_stext);
+ unsigned long kend = (unsigned long)lm_alias(__init_begin);
+ int ret;
+
+ /*
+ * Wait for all secondary CPUs to be put into the waiting area.
+ */
+ smp_cond_load_acquire(&idmap_kpti_bbml2_flag, VAL == num_online_cpus());
+
+ /*
+ * Walk all of the linear map [lstart, lend), except the kernel
+ * linear map alias [kstart, kend), and split all mappings to
+ * PTE. The kernel alias remains static throughout runtime so
+ * can continue to be safely mapped with large mappings.
+ */
+ ret = walk_kernel_page_table_range_lockless(lstart, kstart,
+ &split_to_ptes_ops, NULL, NULL);
+ if (!ret)
+ ret = walk_kernel_page_table_range_lockless(kend, lend,
+ &split_to_ptes_ops, NULL, NULL);
+ if (ret)
+ panic("Failed to split linear map\n");
+ flush_tlb_kernel_range(lstart, lend);
+
+ /*
+ * Relies on dsb in flush_tlb_kernel_range() to avoid reordering
+ * before any page table split operations.
+ */
+ WRITE_ONCE(idmap_kpti_bbml2_flag, 0);
+ } else {
+ typedef void (wait_split_fn)(void);
+ extern wait_split_fn wait_linear_map_split_to_ptes;
+ wait_split_fn *wait_fn;
+
+ wait_fn = (void *)__pa_symbol(wait_linear_map_split_to_ptes);
+
+ /*
+ * At least one secondary CPU doesn't support BBML2 so cannot
+ * tolerate the size of the live mappings changing. So have the
+ * secondary CPUs wait for the boot CPU to make the changes
+ * with the idmap active and init_mm inactive.
+ */
+ cpu_install_idmap();
+ wait_fn();
+ cpu_uninstall_idmap();
+ }
+
+ return 0;
+}
+
+void __init linear_map_maybe_split_to_ptes(void)
+{
+ if (linear_map_requires_bbml2 && !system_supports_bbml2_noabort()) {
+ init_idmap_kpti_bbml2_flag();
+ stop_machine(linear_map_split_to_ptes, NULL, cpu_online_mask);
+ }
+}
+
/*
* This function can only be used to modify existing table entries,
* without allocating new levels of table. Note that this permits the
early_kfence_pool = arm64_kfence_alloc_pool();
+ linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map();
+
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
u64 va_offset);
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
- kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
+ kpti_bbml2_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
static void __init create_idmap(void)
{
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
- if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) {
- extern u32 __idmap_kpti_flag;
- phys_addr_t pa = __pa_symbol(&__idmap_kpti_flag);
+ if (linear_map_requires_bbml2 ||
+ (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings)) {
+ phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml2_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
- * of its synchronization flag in the ID map.
+ * of its synchronization flag in the ID map. This is also used
+ * when splitting the linear map to ptes if a secondary CPU
+ * doesn't support bbml2.
*/
- ptep = __pa_symbol(kpti_ptes);
+ ptep = __pa_symbol(kpti_bbml2_ptes);
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);