[thirdparty/kernel/linux.git] / arch / x86 / mm / pgtable.c

#include <linux/mm.h>
#include <linux/gfp.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
#include <asm/mtrr.h>

#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)

#ifdef CONFIG_HIGHPTE
#define PGALLOC_USER_GFP __GFP_HIGHMEM
#else
#define PGALLOC_USER_GFP 0
#endif

gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;

pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
}

pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
	struct page *pte;

	pte = alloc_pages(__userpte_alloc_gfp, 0);
	if (!pte)
		return NULL;
	if (!pgtable_page_ctor(pte)) {
		__free_page(pte);
		return NULL;
	}
	return pte;
}

static int __init setup_userpte(char *arg)
{
	if (!arg)
		return -EINVAL;

	/*
	 * "userpte=nohigh" disables allocation of user pagetables in
	 * high memory.
	 */
	if (strcmp(arg, "nohigh") == 0)
		__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
	else
		return -EINVAL;
	return 0;
}
early_param("userpte", setup_userpte);

void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
	pgtable_page_dtor(pte);
	paravirt_release_pte(page_to_pfn(pte));
	tlb_remove_page(tlb, pte);
}

#if CONFIG_PGTABLE_LEVELS > 2
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
	struct page *page = virt_to_page(pmd);
	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
	/*
	 * NOTE! For PAE, any changes to the top page-directory-pointer-table
	 * entries need a full cr3 reload to flush.
	 */
#ifdef CONFIG_X86_PAE
	tlb->need_flush_all = 1;
#endif
	pgtable_pmd_page_dtor(page);
	tlb_remove_page(tlb, page);
}

#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
	tlb_remove_page(tlb, virt_to_page(pud));
}
#endif	/* CONFIG_PGTABLE_LEVELS > 3 */
#endif	/* CONFIG_PGTABLE_LEVELS > 2 */

static inline void pgd_list_add(pgd_t *pgd)
{
	struct page *page = virt_to_page(pgd);

	list_add(&page->lru, &pgd_list);
}

static inline void pgd_list_del(pgd_t *pgd)
{
	struct page *page = virt_to_page(pgd);

	list_del(&page->lru);
}

#define UNSHARED_PTRS_PER_PGD				\
	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)


static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
	BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
	virt_to_page(pgd)->index = (pgoff_t)mm;
}

struct mm_struct *pgd_page_get_mm(struct page *page)
{
	return (struct mm_struct *)page->index;
}

static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
	/* If the pgd points to a shared pagetable level (either the
	   ptes in non-PAE, or shared PMD in PAE), then just copy the
	   references from swapper_pg_dir. */
	if (CONFIG_PGTABLE_LEVELS == 2 ||
	    (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
	    CONFIG_PGTABLE_LEVELS == 4) {
		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
				KERNEL_PGD_PTRS);
	}

	/* list required to sync kernel mapping updates */
	if (!SHARED_KERNEL_PMD) {
		pgd_set_mm(pgd, mm);
		pgd_list_add(pgd);
	}
}

static void pgd_dtor(pgd_t *pgd)
{
	if (SHARED_KERNEL_PMD)
		return;

	spin_lock(&pgd_lock);
	pgd_list_del(pgd);
	spin_unlock(&pgd_lock);
}

/*
 * List of all pgd's needed for non-PAE so it can invalidate entries
 * in both cached and uncached pgd's; not needed for PAE since the
 * kernel pmd is shared. If PAE were not to share the pmd a similar
 * tactic would be needed. This is essentially codepath-based locking
 * against pageattr.c; it is the unique case in which a valid change
 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
 * vmalloc faults work because attached pagetables are never freed.
 * -- nyc
 */

#ifdef CONFIG_X86_PAE
/*
 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
 * updating the top-level pagetable entries to guarantee the
 * processor notices the update.  Since this is expensive, and
 * all 4 top-level entries are used almost immediately in a
 * new process's life, we just pre-populate them here.
 *
 * Also, if we're in a paravirt environment where the kernel pmd is
 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
 * and initialize the kernel pmds here.
 */
#define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD

void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);

	/* Note: almost everything apart from _PAGE_PRESENT is
	   reserved at the pmd (PDPT) level. */
	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));

	/*
	 * According to Intel App note "TLBs, Paging-Structure Caches,
	 * and Their Invalidation", April 2007, document 317080-001,
	 * section 8.1: in PAE mode we explicitly have to flush the
	 * TLB via cr3 if the top-level pgd is changed...
	 */
	flush_tlb_mm(mm);
}
#else  /* !CONFIG_X86_PAE */

/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS	0

#endif	/* CONFIG_X86_PAE */

static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
{
	int i;

	for(i = 0; i < PREALLOCATED_PMDS; i++)
		if (pmds[i]) {
			pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
			free_page((unsigned long)pmds[i]);
			mm_dec_nr_pmds(mm);
		}
}

static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
{
	int i;
	bool failed = false;
	gfp_t gfp = PGALLOC_GFP;

	if (mm == &init_mm)
		gfp &= ~__GFP_ACCOUNT;

	for(i = 0; i < PREALLOCATED_PMDS; i++) {
		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
		if (!pmd)
			failed = true;
		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
			free_page((unsigned long)pmd);
			pmd = NULL;
			failed = true;
		}
		if (pmd)
			mm_inc_nr_pmds(mm);
		pmds[i] = pmd;
	}

	if (failed) {
		free_pmds(mm, pmds);
		return -ENOMEM;
	}

	return 0;
}

/*
 * Mop up any pmd pages which may still be attached to the pgd.
 * Normally they will be freed by munmap/exit_mmap, but any pmd we
 * preallocate which never got a corresponding vma will need to be
 * freed manually.
 */
static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
	int i;

	for(i = 0; i < PREALLOCATED_PMDS; i++) {
		pgd_t pgd = pgdp[i];

		if (pgd_val(pgd) != 0) {
			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);

			pgdp[i] = native_make_pgd(0);

			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
			pmd_free(mm, pmd);
			mm_dec_nr_pmds(mm);
		}
	}
}

static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
	pud_t *pud;
	int i;

	if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
		return;

	pud = pud_offset(pgd, 0);

	for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
		pmd_t *pmd = pmds[i];

		if (i >= KERNEL_PGD_BOUNDARY)
			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
			       sizeof(pmd_t) * PTRS_PER_PMD);

		pud_populate(mm, pud, pmd);
	}
}

/*
 * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
 * assumes that pgd should be in one page.
 *
 * But kernel with PAE paging that is not running as a Xen domain
 * only needs to allocate 32 bytes for pgd instead of one page.
 */
#ifdef CONFIG_X86_PAE

#include <linux/slab.h>

#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
#define PGD_ALIGN	32

static struct kmem_cache *pgd_cache;

static int __init pgd_cache_init(void)
{
	/*
	 * When PAE kernel is running as a Xen domain, it does not use
	 * shared kernel pmd. And this requires a whole page for pgd.
	 */
	if (!SHARED_KERNEL_PMD)
		return 0;

	/*
	 * when PAE kernel is not running as a Xen domain, it uses
	 * shared kernel pmd. Shared kernel pmd does not require a whole
	 * page for pgd. We are able to just allocate a 32-byte for pgd.
	 * During boot time, we create a 32-byte slab for pgd table allocation.
	 */
	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
				      SLAB_PANIC, NULL);
	if (!pgd_cache)
		return -ENOMEM;

	return 0;
}
core_initcall(pgd_cache_init);

static inline pgd_t *_pgd_alloc(void)
{
	/*
	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
	 * We allocate one page for pgd.
	 */
	if (!SHARED_KERNEL_PMD)
		return (pgd_t *)__get_free_page(PGALLOC_GFP);

	/*
	 * Now PAE kernel is not running as a Xen domain. We can allocate
	 * a 32-byte slab for pgd to save memory space.
	 */
	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
}

static inline void _pgd_free(pgd_t *pgd)
{
	if (!SHARED_KERNEL_PMD)
		free_page((unsigned long)pgd);
	else
		kmem_cache_free(pgd_cache, pgd);
}
#else
static inline pgd_t *_pgd_alloc(void)
{
	return (pgd_t *)__get_free_page(PGALLOC_GFP);
}

static inline void _pgd_free(pgd_t *pgd)
{
	free_page((unsigned long)pgd);
}
#endif /* CONFIG_X86_PAE */

pgd_t *pgd_alloc(struct mm_struct *mm)
{
	pgd_t *pgd;
	pmd_t *pmds[PREALLOCATED_PMDS];

	pgd = _pgd_alloc();

	if (pgd == NULL)
		goto out;

	mm->pgd = pgd;

	if (preallocate_pmds(mm, pmds) != 0)
		goto out_free_pgd;

	if (paravirt_pgd_alloc(mm) != 0)
		goto out_free_pmds;

	/*
	 * Make sure that pre-populating the pmds is atomic with
	 * respect to anything walking the pgd_list, so that they
	 * never see a partially populated pgd.
	 */
	spin_lock(&pgd_lock);

	pgd_ctor(mm, pgd);
	pgd_prepopulate_pmd(mm, pgd, pmds);

	spin_unlock(&pgd_lock);

	return pgd;

out_free_pmds:
	free_pmds(mm, pmds);
out_free_pgd:
	_pgd_free(pgd);
out:
	return NULL;
}

void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
	pgd_mop_up_pmds(mm, pgd);
	pgd_dtor(pgd);
	paravirt_pgd_free(mm, pgd);
	_pgd_free(pgd);
}

/*
 * Used to set accessed or dirty bits in the page table entries
 * on other architectures. On x86, the accessed and dirty bits
 * are tracked by hardware. However, do_wp_page calls this function
 * to also make the pte writeable at the same time the dirty bit is
 * set. In that case we do actually need to write the PTE.
 */
int ptep_set_access_flags(struct vm_area_struct *vma,
			  unsigned long address, pte_t *ptep,
			  pte_t entry, int dirty)
{
	int changed = !pte_same(*ptep, entry);

	if (changed && dirty) {
		*ptep = entry;
		pte_update(vma->vm_mm, address, ptep);
	}

	return changed;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_set_access_flags(struct vm_area_struct *vma,
			  unsigned long address, pmd_t *pmdp,
			  pmd_t entry, int dirty)
{
	int changed = !pmd_same(*pmdp, entry);

	VM_BUG_ON(address & ~HPAGE_PMD_MASK);

	if (changed && dirty) {
		*pmdp = entry;
		/*
		 * We had a write-protection fault here and changed the pmd
		 * to to more permissive. No need to flush the TLB for that,
		 * #PF is architecturally guaranteed to do that and in the
		 * worst-case we'll generate a spurious fault.
		 */
	}

	return changed;
}

int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
			  pud_t *pudp, pud_t entry, int dirty)
{
	int changed = !pud_same(*pudp, entry);

	VM_BUG_ON(address & ~HPAGE_PUD_MASK);

	if (changed && dirty) {
		*pudp = entry;
		/*
		 * We had a write-protection fault here and changed the pud
		 * to to more permissive. No need to flush the TLB for that,
		 * #PF is architecturally guaranteed to do that and in the
		 * worst-case we'll generate a spurious fault.
		 */
	}

	return changed;
}
#endif

int ptep_test_and_clear_young(struct vm_area_struct *vma,
			      unsigned long addr, pte_t *ptep)
{
	int ret = 0;

	if (pte_young(*ptep))
		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
					 (unsigned long *) &ptep->pte);

	if (ret)
		pte_update(vma->vm_mm, addr, ptep);

	return ret;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
			      unsigned long addr, pmd_t *pmdp)
{
	int ret = 0;

	if (pmd_young(*pmdp))
		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
					 (unsigned long *)pmdp);

	return ret;
}
int pudp_test_and_clear_young(struct vm_area_struct *vma,
			      unsigned long addr, pud_t *pudp)
{
	int ret = 0;

	if (pud_young(*pudp))
		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
					 (unsigned long *)pudp);

	return ret;
}
#endif

int ptep_clear_flush_young(struct vm_area_struct *vma,
			   unsigned long address, pte_t *ptep)
{
	/*
	 * On x86 CPUs, clearing the accessed bit without a TLB flush
	 * doesn't cause data corruption. [ It could cause incorrect
	 * page aging and the (mistaken) reclaim of hot pages, but the
	 * chance of that should be relatively low. ]
	 *
	 * So as a performance optimization don't flush the TLB when
	 * clearing the accessed bit, it will eventually be flushed by
	 * a context switch or a VM operation anyway. [ In the rare
	 * event of it not getting flushed for a long time the delay
	 * shouldn't really matter because there's no real memory
	 * pressure for swapout to react to. ]
	 */
	return ptep_test_and_clear_young(vma, address, ptep);
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_clear_flush_young(struct vm_area_struct *vma,
			   unsigned long address, pmd_t *pmdp)
{
	int young;

	VM_BUG_ON(address & ~HPAGE_PMD_MASK);

	young = pmdp_test_and_clear_young(vma, address, pmdp);
	if (young)
		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);

	return young;
}
#endif

/**
 * reserve_top_address - reserves a hole in the top of kernel address space
 * @reserve - size of hole to reserve
 *
 * Can be used to relocate the fixmap area and poke a hole in the top
 * of kernel address space to make room for a hypervisor.
 */
void __init reserve_top_address(unsigned long reserve)
{
#ifdef CONFIG_X86_32
	BUG_ON(fixmaps_set > 0);
	__FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
	printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
	       -reserve, __FIXADDR_TOP + PAGE_SIZE);
#endif
}

int fixmaps_set;

void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
	unsigned long address = __fix_to_virt(idx);

	if (idx >= __end_of_fixed_addresses) {
		BUG();
		return;
	}
	set_pte_vaddr(address, pte);
	fixmaps_set++;
}

void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
		       pgprot_t flags)
{
	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
}

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
/**
 * pud_set_huge - setup kernel PUD mapping
 *
 * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
 * function sets up a huge page only if any of the following conditions are met:
 *
 * - MTRRs are disabled, or
 *
 * - MTRRs are enabled and the range is completely covered by a single MTRR, or
 *
 * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
 *   has no effect on the requested PAT memory type.
 *
 * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
 * page mapping attempt fails.
 *
 * Returns 1 on success and 0 on failure.
 */
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
	u8 mtrr, uniform;

	mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
	    (mtrr != MTRR_TYPE_WRBACK))
		return 0;

	prot = pgprot_4k_2_large(prot);

	set_pte((pte_t *)pud, pfn_pte(
		(u64)addr >> PAGE_SHIFT,
		__pgprot(pgprot_val(prot) | _PAGE_PSE)));

	return 1;
}

/**
 * pmd_set_huge - setup kernel PMD mapping
 *
 * See text over pud_set_huge() above.
 *
 * Returns 1 on success and 0 on failure.
 */
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
	u8 mtrr, uniform;

	mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
	    (mtrr != MTRR_TYPE_WRBACK)) {
		pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
			     __func__, addr, addr + PMD_SIZE);
		return 0;
	}

	prot = pgprot_4k_2_large(prot);

	set_pte((pte_t *)pmd, pfn_pte(
		(u64)addr >> PAGE_SHIFT,
		__pgprot(pgprot_val(prot) | _PAGE_PSE)));

	return 1;
}

/**
 * pud_clear_huge - clear kernel PUD mapping when it is set
 *
 * Returns 1 on success and 0 on failure (no PUD map is found).
 */
int pud_clear_huge(pud_t *pud)
{
	if (pud_large(*pud)) {
		pud_clear(pud);
		return 1;
	}

	return 0;
}

/**
 * pmd_clear_huge - clear kernel PMD mapping when it is set
 *
 * Returns 1 on success and 0 on failure (no PMD map is found).
 */
int pmd_clear_huge(pmd_t *pmd)
{
	if (pmd_large(*pmd)) {
		pmd_clear(pmd);
		return 1;
	}

	return 0;
}
#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
Commit	Line	Data
4f76cd38	1	#include <linux/mm.h>
5a0e3ad6	2	#include <linux/gfp.h>
4f76cd38	3	#include <asm/pgalloc.h>
ee5aa8d3	4	#include <asm/pgtable.h>
4f76cd38	5	#include <asm/tlb.h>
a1d5a869	6	#include <asm/fixmap.h>
6b637835	7	#include <asm/mtrr.h>
4f76cd38	8
3e79ec7d	9	#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT \| __GFP_NOTRACK \| __GFP_ZERO)
9e730237	10
14315592 IC	11	#ifdef CONFIG_HIGHPTE
	12	#define PGALLOC_USER_GFP __GFP_HIGHMEM
	13	#else
	14	#define PGALLOC_USER_GFP 0
	15	#endif
	16
	17	gfp_t __userpte_alloc_gfp = PGALLOC_GFP \| PGALLOC_USER_GFP;
	18
4f76cd38 JF	19	pte_t pte_alloc_one_kernel(struct mm_struct mm, unsigned long address)
4f76cd38 JF	20	{
3e79ec7d	21	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
4f76cd38 JF	22	}
	23
	24	pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
	25	{
	26	struct page *pte;
	27
14315592	28	pte = alloc_pages(__userpte_alloc_gfp, 0);
cecbd1b5 KS	29	if (!pte)
	30	return NULL;
	31	if (!pgtable_page_ctor(pte)) {
	32	__free_page(pte);
	33	return NULL;
	34	}
4f76cd38 JF	35	return pte;
	36	}
	37
14315592 IC	38	static int __init setup_userpte(char *arg)
	39	{
	40	if (!arg)
	41	return -EINVAL;
	42
	43	/*
	44	* "userpte=nohigh" disables allocation of user pagetables in
	45	* high memory.
	46	*/
	47	if (strcmp(arg, "nohigh") == 0)
	48	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
	49	else
	50	return -EINVAL;
	51	return 0;
	52	}
	53	early_param("userpte", setup_userpte);
	54
9e1b32ca	55	void ___pte_free_tlb(struct mmu_gather tlb, struct page pte)
397f687a JF	56	{
397f687a JF	57	pgtable_page_dtor(pte);
6944a9c8	58	paravirt_release_pte(page_to_pfn(pte));
397f687a JF	59	tlb_remove_page(tlb, pte);
	60	}
	61
98233368	62	#if CONFIG_PGTABLE_LEVELS > 2
9e1b32ca	63	void ___pmd_free_tlb(struct mmu_gather tlb, pmd_t pmd)
170fdff7	64	{
c283610e	65	struct page *page = virt_to_page(pmd);
6944a9c8	66	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
1de14c3c DH	67	/*
	68	* NOTE! For PAE, any changes to the top page-directory-pointer-table
	69	* entries need a full cr3 reload to flush.
	70	*/
	71	#ifdef CONFIG_X86_PAE
	72	tlb->need_flush_all = 1;
	73	#endif
c283610e KS	74	pgtable_pmd_page_dtor(page);
c283610e KS	75	tlb_remove_page(tlb, page);
170fdff7	76	}
5a5f8f42	77
98233368	78	#if CONFIG_PGTABLE_LEVELS > 3
9e1b32ca	79	void ___pud_free_tlb(struct mmu_gather tlb, pud_t pud)
5a5f8f42	80	{
2761fa09	81	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
5a5f8f42 JF	82	tlb_remove_page(tlb, virt_to_page(pud));
5a5f8f42 JF	83	}
98233368 KS	84	#endif /* CONFIG_PGTABLE_LEVELS > 3 */
98233368 KS	85	#endif /* CONFIG_PGTABLE_LEVELS > 2 */
170fdff7	86
4f76cd38 JF	87	static inline void pgd_list_add(pgd_t *pgd)
	88	{
	89	struct page *page = virt_to_page(pgd);
4f76cd38	90
4f76cd38	91	list_add(&page->lru, &pgd_list);
4f76cd38 JF	92	}
	93
	94	static inline void pgd_list_del(pgd_t *pgd)
	95	{
	96	struct page *page = virt_to_page(pgd);
4f76cd38	97
4f76cd38	98	list_del(&page->lru);
4f76cd38 JF	99	}
4f76cd38 JF	100
4f76cd38	101	#define UNSHARED_PTRS_PER_PGD \
68db065c	102	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
4f76cd38	103
617d34d9 JF	104
	105	static void pgd_set_mm(pgd_t pgd, struct mm_struct mm)
	106	{
	107	BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
	108	virt_to_page(pgd)->index = (pgoff_t)mm;
	109	}
	110
	111	struct mm_struct pgd_page_get_mm(struct page page)
	112	{
	113	return (struct mm_struct *)page->index;
	114	}
	115
	116	static void pgd_ctor(struct mm_struct mm, pgd_t pgd)
4f76cd38	117	{
4f76cd38 JF	118	/* If the pgd points to a shared pagetable level (either the
	119	ptes in non-PAE, or shared PMD in PAE), then just copy the
	120	references from swapper_pg_dir. */
98233368 KS	121	if (CONFIG_PGTABLE_LEVELS == 2 \|\|
	122	(CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) \|\|
	123	CONFIG_PGTABLE_LEVELS == 4) {
68db065c JF	124	clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
68db065c JF	125	swapper_pg_dir + KERNEL_PGD_BOUNDARY,
4f76cd38	126	KERNEL_PGD_PTRS);
4f76cd38 JF	127	}
	128
	129	/* list required to sync kernel mapping updates */
617d34d9 JF	130	if (!SHARED_KERNEL_PMD) {
617d34d9 JF	131	pgd_set_mm(pgd, mm);
4f76cd38	132	pgd_list_add(pgd);
617d34d9	133	}
4f76cd38 JF	134	}
4f76cd38 JF	135
17b74627	136	static void pgd_dtor(pgd_t *pgd)
4f76cd38	137	{
4f76cd38 JF	138	if (SHARED_KERNEL_PMD)
	139	return;
	140
a79e53d8	141	spin_lock(&pgd_lock);
4f76cd38	142	pgd_list_del(pgd);
a79e53d8	143	spin_unlock(&pgd_lock);
4f76cd38 JF	144	}
4f76cd38 JF	145
85958b46 JF	146	/*
	147	* List of all pgd's needed for non-PAE so it can invalidate entries
	148	* in both cached and uncached pgd's; not needed for PAE since the
	149	* kernel pmd is shared. If PAE were not to share the pmd a similar
	150	* tactic would be needed. This is essentially codepath-based locking
	151	* against pageattr.c; it is the unique case in which a valid change
	152	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
	153	* vmalloc faults work because attached pagetables are never freed.
6d49e352	154	* -- nyc
85958b46 JF	155	*/
85958b46 JF	156
4f76cd38	157	#ifdef CONFIG_X86_PAE
d8d5900e JF	158	/*
	159	* In PAE mode, we need to do a cr3 reload (=tlb flush) when
	160	* updating the top-level pagetable entries to guarantee the
	161	* processor notices the update. Since this is expensive, and
	162	* all 4 top-level entries are used almost immediately in a
	163	* new process's life, we just pre-populate them here.
	164	*
	165	* Also, if we're in a paravirt environment where the kernel pmd is
	166	* not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
	167	* and initialize the kernel pmds here.
	168	*/
	169	#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
	170
	171	void pud_populate(struct mm_struct mm, pud_t pudp, pmd_t *pmd)
	172	{
	173	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
	174
	175	/* Note: almost everything apart from _PAGE_PRESENT is
	176	reserved at the pmd (PDPT) level. */
	177	set_pud(pudp, __pud(__pa(pmd) \| _PAGE_PRESENT));
	178
	179	/*
	180	* According to Intel App note "TLBs, Paging-Structure Caches,
	181	* and Their Invalidation", April 2007, document 317080-001,
	182	* section 8.1: in PAE mode we explicitly have to flush the
	183	* TLB via cr3 if the top-level pgd is changed...
	184	*/
4981d01e	185	flush_tlb_mm(mm);
d8d5900e JF	186	}
	187	#else /* !CONFIG_X86_PAE */
	188
	189	/* No need to prepopulate any pagetable entries in non-PAE modes. */
	190	#define PREALLOCATED_PMDS 0
	191
	192	#endif /* CONFIG_X86_PAE */
	193
dc6c9a35	194	static void free_pmds(struct mm_struct mm, pmd_t pmds[])
d8d5900e JF	195	{
	196	int i;
	197
	198	for(i = 0; i < PREALLOCATED_PMDS; i++)
09ef4939 KS	199	if (pmds[i]) {
09ef4939 KS	200	pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
d8d5900e	201	free_page((unsigned long)pmds[i]);
dc6c9a35	202	mm_dec_nr_pmds(mm);
09ef4939	203	}
d8d5900e JF	204	}
d8d5900e JF	205
dc6c9a35	206	static int preallocate_pmds(struct mm_struct mm, pmd_t pmds[])
d8d5900e JF	207	{
	208	int i;
	209	bool failed = false;
3e79ec7d VD	210	gfp_t gfp = PGALLOC_GFP;
	211
	212	if (mm == &init_mm)
	213	gfp &= ~__GFP_ACCOUNT;
d8d5900e JF	214
d8d5900e JF	215	for(i = 0; i < PREALLOCATED_PMDS; i++) {
3e79ec7d	216	pmd_t pmd = (pmd_t )__get_free_page(gfp);
09ef4939	217	if (!pmd)
d8d5900e	218	failed = true;
09ef4939	219	if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
2a46eed5	220	free_page((unsigned long)pmd);
09ef4939 KS	221	pmd = NULL;
	222	failed = true;
	223	}
dc6c9a35 KS	224	if (pmd)
dc6c9a35 KS	225	mm_inc_nr_pmds(mm);
d8d5900e JF	226	pmds[i] = pmd;
	227	}
	228
	229	if (failed) {
dc6c9a35	230	free_pmds(mm, pmds);
d8d5900e JF	231	return -ENOMEM;
	232	}
	233
	234	return 0;
	235	}
	236
4f76cd38 JF	237	/*
	238	* Mop up any pmd pages which may still be attached to the pgd.
	239	* Normally they will be freed by munmap/exit_mmap, but any pmd we
	240	* preallocate which never got a corresponding vma will need to be
	241	* freed manually.
	242	*/
	243	static void pgd_mop_up_pmds(struct mm_struct mm, pgd_t pgdp)
	244	{
	245	int i;
	246
d8d5900e	247	for(i = 0; i < PREALLOCATED_PMDS; i++) {
4f76cd38 JF	248	pgd_t pgd = pgdp[i];
	249
	250	if (pgd_val(pgd) != 0) {
	251	pmd_t pmd = (pmd_t )pgd_page_vaddr(pgd);
	252
	253	pgdp[i] = native_make_pgd(0);
	254
6944a9c8	255	paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
4f76cd38	256	pmd_free(mm, pmd);
dc6c9a35	257	mm_dec_nr_pmds(mm);
4f76cd38 JF	258	}
	259	}
	260	}
	261
d8d5900e	262	static void pgd_prepopulate_pmd(struct mm_struct mm, pgd_t pgd, pmd_t *pmds[])
4f76cd38 JF	263	{
4f76cd38 JF	264	pud_t *pud;
4f76cd38 JF	265	int i;
4f76cd38 JF	266
cf3e5050 JF	267	if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
	268	return;
	269
4f76cd38	270	pud = pud_offset(pgd, 0);
4f76cd38	271
73b44ff4	272	for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
d8d5900e	273	pmd_t *pmd = pmds[i];
4f76cd38	274
68db065c	275	if (i >= KERNEL_PGD_BOUNDARY)
4f76cd38 JF	276	memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
	277	sizeof(pmd_t) * PTRS_PER_PMD);
	278
	279	pud_populate(mm, pud, pmd);
	280	}
4f76cd38	281	}
1ec1fe73	282
1db491f7 FY	283	/*
	284	* Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
	285	* assumes that pgd should be in one page.
	286	*
	287	* But kernel with PAE paging that is not running as a Xen domain
	288	* only needs to allocate 32 bytes for pgd instead of one page.
	289	*/
	290	#ifdef CONFIG_X86_PAE
	291
	292	#include <linux/slab.h>
	293
	294	#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
	295	#define PGD_ALIGN 32
	296
	297	static struct kmem_cache *pgd_cache;
	298
	299	static int __init pgd_cache_init(void)
	300	{
	301	/*
	302	* When PAE kernel is running as a Xen domain, it does not use
	303	* shared kernel pmd. And this requires a whole page for pgd.
	304	*/
	305	if (!SHARED_KERNEL_PMD)
	306	return 0;
	307
	308	/*
	309	* when PAE kernel is not running as a Xen domain, it uses
	310	* shared kernel pmd. Shared kernel pmd does not require a whole
	311	* page for pgd. We are able to just allocate a 32-byte for pgd.
	312	* During boot time, we create a 32-byte slab for pgd table allocation.
	313	*/
	314	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
	315	SLAB_PANIC, NULL);
	316	if (!pgd_cache)
	317	return -ENOMEM;
	318
	319	return 0;
	320	}
	321	core_initcall(pgd_cache_init);
	322
	323	static inline pgd_t *_pgd_alloc(void)
	324	{
	325	/*
	326	* If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
	327	* We allocate one page for pgd.
	328	*/
	329	if (!SHARED_KERNEL_PMD)
	330	return (pgd_t *)__get_free_page(PGALLOC_GFP);
	331
	332	/*
	333	* Now PAE kernel is not running as a Xen domain. We can allocate
	334	* a 32-byte slab for pgd to save memory space.
	335	*/
	336	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
	337	}
	338
	339	static inline void _pgd_free(pgd_t *pgd)
	340	{
	341	if (!SHARED_KERNEL_PMD)
	342	free_page((unsigned long)pgd);
	343	else
	344	kmem_cache_free(pgd_cache, pgd);
	345	}
	346	#else
347	static inline pgd_t *_pgd_alloc(void)
348	{
349	return (pgd_t *)__get_free_page(PGALLOC_GFP);
350	}
351
352	static inline void _pgd_free(pgd_t *pgd)
353	{
354	free_page((unsigned long)pgd);
355	}
356	#endif /* CONFIG_X86_PAE */
357
d8d5900e	358	pgd_t pgd_alloc(struct mm_struct mm)
1ec1fe73	359	{
d8d5900e JF	360	pgd_t *pgd;
d8d5900e JF	361	pmd_t *pmds[PREALLOCATED_PMDS];
1ec1fe73	362
1db491f7	363	pgd = _pgd_alloc();
d8d5900e JF	364
	365	if (pgd == NULL)
	366	goto out;
	367
	368	mm->pgd = pgd;
	369
dc6c9a35	370	if (preallocate_pmds(mm, pmds) != 0)
d8d5900e JF	371	goto out_free_pgd;
	372
	373	if (paravirt_pgd_alloc(mm) != 0)
	374	goto out_free_pmds;
1ec1fe73 IM	375
1ec1fe73 IM	376	/*
d8d5900e JF	377	* Make sure that pre-populating the pmds is atomic with
	378	* respect to anything walking the pgd_list, so that they
	379	* never see a partially populated pgd.
1ec1fe73	380	*/
a79e53d8	381	spin_lock(&pgd_lock);
4f76cd38	382
617d34d9	383	pgd_ctor(mm, pgd);
d8d5900e	384	pgd_prepopulate_pmd(mm, pgd, pmds);
4f76cd38	385
a79e53d8	386	spin_unlock(&pgd_lock);
4f76cd38 JF	387
4f76cd38 JF	388	return pgd;
d8d5900e JF	389
d8d5900e JF	390	out_free_pmds:
dc6c9a35	391	free_pmds(mm, pmds);
d8d5900e	392	out_free_pgd:
1db491f7	393	_pgd_free(pgd);
d8d5900e JF	394	out:
d8d5900e JF	395	return NULL;
4f76cd38 JF	396	}
	397
	398	void pgd_free(struct mm_struct mm, pgd_t pgd)
	399	{
	400	pgd_mop_up_pmds(mm, pgd);
	401	pgd_dtor(pgd);
eba0045f	402	paravirt_pgd_free(mm, pgd);
1db491f7	403	_pgd_free(pgd);
4f76cd38	404	}
ee5aa8d3	405
0f9a921c RR	406	/*
	407	* Used to set accessed or dirty bits in the page table entries
	408	* on other architectures. On x86, the accessed and dirty bits
	409	* are tracked by hardware. However, do_wp_page calls this function
	410	* to also make the pte writeable at the same time the dirty bit is
	411	* set. In that case we do actually need to write the PTE.
	412	*/
ee5aa8d3 JF	413	int ptep_set_access_flags(struct vm_area_struct *vma,
	414	unsigned long address, pte_t *ptep,
	415	pte_t entry, int dirty)
	416	{
	417	int changed = !pte_same(*ptep, entry);
	418
	419	if (changed && dirty) {
	420	*ptep = entry;
d6ccc3ec	421	pte_update(vma->vm_mm, address, ptep);
ee5aa8d3 JF	422	}
	423
	424	return changed;
	425	}
f9fbf1a3	426
db3eb96f AA	427	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	428	int pmdp_set_access_flags(struct vm_area_struct *vma,
	429	unsigned long address, pmd_t *pmdp,
	430	pmd_t entry, int dirty)
	431	{
	432	int changed = !pmd_same(*pmdp, entry);
	433
	434	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	435
	436	if (changed && dirty) {
	437	*pmdp = entry;
5e4bf1a5 IM	438	/*
	439	* We had a write-protection fault here and changed the pmd
	440	* to to more permissive. No need to flush the TLB for that,
	441	* #PF is architecturally guaranteed to do that and in the
	442	* worst-case we'll generate a spurious fault.
	443	*/
db3eb96f AA	444	}
	445
	446	return changed;
	447	}
a00cc7d9 MW	448
	449	int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
	450	pud_t *pudp, pud_t entry, int dirty)
	451	{
	452	int changed = !pud_same(*pudp, entry);
	453
	454	VM_BUG_ON(address & ~HPAGE_PUD_MASK);
	455
	456	if (changed && dirty) {
	457	*pudp = entry;
	458	/*
	459	* We had a write-protection fault here and changed the pud
	460	* to to more permissive. No need to flush the TLB for that,
	461	* #PF is architecturally guaranteed to do that and in the
	462	* worst-case we'll generate a spurious fault.
	463	*/
	464	}
	465
	466	return changed;
	467	}
db3eb96f AA	468	#endif
db3eb96f AA	469
f9fbf1a3 JF	470	int ptep_test_and_clear_young(struct vm_area_struct *vma,
	471	unsigned long addr, pte_t *ptep)
	472	{
	473	int ret = 0;
	474
	475	if (pte_young(*ptep))
	476	ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
48e23957	477	(unsigned long *) &ptep->pte);
f9fbf1a3 JF	478
	479	if (ret)
	480	pte_update(vma->vm_mm, addr, ptep);
	481
	482	return ret;
	483	}
c20311e1	484
db3eb96f AA	485	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	486	int pmdp_test_and_clear_young(struct vm_area_struct *vma,
	487	unsigned long addr, pmd_t *pmdp)
	488	{
	489	int ret = 0;
	490
	491	if (pmd_young(*pmdp))
	492	ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
f2d6bfe9	493	(unsigned long *)pmdp);
db3eb96f	494
db3eb96f AA	495	return ret;
db3eb96f AA	496	}
a00cc7d9 MW	497	int pudp_test_and_clear_young(struct vm_area_struct *vma,
	498	unsigned long addr, pud_t *pudp)
	499	{
	500	int ret = 0;
	501
	502	if (pud_young(*pudp))
	503	ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
	504	(unsigned long *)pudp);
	505
	506	return ret;
	507	}
db3eb96f AA	508	#endif
db3eb96f AA	509
c20311e1 JF	510	int ptep_clear_flush_young(struct vm_area_struct *vma,
	511	unsigned long address, pte_t *ptep)
	512	{
b13b1d2d SL	513	/*
	514	* On x86 CPUs, clearing the accessed bit without a TLB flush
	515	* doesn't cause data corruption. [ It could cause incorrect
	516	* page aging and the (mistaken) reclaim of hot pages, but the
	517	* chance of that should be relatively low. ]
	518	*
	519	* So as a performance optimization don't flush the TLB when
	520	* clearing the accessed bit, it will eventually be flushed by
	521	* a context switch or a VM operation anyway. [ In the rare
	522	* event of it not getting flushed for a long time the delay
	523	* shouldn't really matter because there's no real memory
	524	* pressure for swapout to react to. ]
	525	*/
	526	return ptep_test_and_clear_young(vma, address, ptep);
c20311e1	527	}
7c7e6e07	528
db3eb96f AA	529	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	530	int pmdp_clear_flush_young(struct vm_area_struct *vma,
	531	unsigned long address, pmd_t *pmdp)
	532	{
	533	int young;
	534
	535	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
	536
	537	young = pmdp_test_and_clear_young(vma, address, pmdp);
	538	if (young)
	539	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
	540
	541	return young;
	542	}
db3eb96f AA	543	#endif
db3eb96f AA	544
fd862dde GP	545	/**
	546	* reserve_top_address - reserves a hole in the top of kernel address space
	547	* @reserve - size of hole to reserve
	548	*
	549	* Can be used to relocate the fixmap area and poke a hole in the top
	550	* of kernel address space to make room for a hypervisor.
	551	*/
	552	void __init reserve_top_address(unsigned long reserve)
	553	{
	554	#ifdef CONFIG_X86_32
	555	BUG_ON(fixmaps_set > 0);
73159fdc AL	556	__FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
	557	printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
	558	-reserve, __FIXADDR_TOP + PAGE_SIZE);
fd862dde GP	559	#endif
	560	}
	561
7c7e6e07 JF	562	int fixmaps_set;
7c7e6e07 JF	563
aeaaa59c	564	void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
7c7e6e07 JF	565	{
	566	unsigned long address = __fix_to_virt(idx);
	567
	568	if (idx >= __end_of_fixed_addresses) {
	569	BUG();
	570	return;
	571	}
aeaaa59c	572	set_pte_vaddr(address, pte);
7c7e6e07 JF	573	fixmaps_set++;
7c7e6e07 JF	574	}
aeaaa59c	575
3b3809ac MH	576	void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
3b3809ac MH	577	pgprot_t flags)
aeaaa59c JF	578	{
	579	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
	580	}
6b637835 TK	581
6b637835 TK	582	#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
3d3ca416 TK	583	/**
	584	* pud_set_huge - setup kernel PUD mapping
	585	*
b73522e0 TK	586	* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
	587	* function sets up a huge page only if any of the following conditions are met:
	588	*
	589	* - MTRRs are disabled, or
	590	*
	591	* - MTRRs are enabled and the range is completely covered by a single MTRR, or
	592	*
	593	* - MTRRs are enabled and the corresponding MTRR memory type is WB, which
	594	* has no effect on the requested PAT memory type.
	595	*
	596	* Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
	597	* page mapping attempt fails.
3d3ca416 TK	598	*
	599	* Returns 1 on success and 0 on failure.
	600	*/
6b637835 TK	601	int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
6b637835 TK	602	{
b73522e0	603	u8 mtrr, uniform;
6b637835	604
b73522e0 TK	605	mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
	606	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
	607	(mtrr != MTRR_TYPE_WRBACK))
6b637835 TK	608	return 0;
	609
	610	prot = pgprot_4k_2_large(prot);
	611
	612	set_pte((pte_t *)pud, pfn_pte(
	613	(u64)addr >> PAGE_SHIFT,
	614	__pgprot(pgprot_val(prot) \| _PAGE_PSE)));
	615
	616	return 1;
	617	}
	618
3d3ca416 TK	619	/**
	620	* pmd_set_huge - setup kernel PMD mapping
	621	*
b73522e0	622	* See text over pud_set_huge() above.
3d3ca416 TK	623	*
	624	* Returns 1 on success and 0 on failure.
	625	*/
6b637835 TK	626	int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
6b637835 TK	627	{
b73522e0	628	u8 mtrr, uniform;
6b637835	629
b73522e0 TK	630	mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
	631	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
	632	(mtrr != MTRR_TYPE_WRBACK)) {
	633	pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
	634	__func__, addr, addr + PMD_SIZE);
6b637835	635	return 0;
b73522e0	636	}
6b637835 TK	637
	638	prot = pgprot_4k_2_large(prot);
	639
	640	set_pte((pte_t *)pmd, pfn_pte(
	641	(u64)addr >> PAGE_SHIFT,
	642	__pgprot(pgprot_val(prot) \| _PAGE_PSE)));
	643
	644	return 1;
	645	}
	646
3d3ca416 TK	647	/**
	648	* pud_clear_huge - clear kernel PUD mapping when it is set
	649	*
	650	* Returns 1 on success and 0 on failure (no PUD map is found).
	651	*/
6b637835 TK	652	int pud_clear_huge(pud_t *pud)
	653	{
	654	if (pud_large(*pud)) {
	655	pud_clear(pud);
	656	return 1;
	657	}
	658
	659	return 0;
	660	}
	661
3d3ca416 TK	662	/**
	663	* pmd_clear_huge - clear kernel PMD mapping when it is set
	664	*
	665	* Returns 1 on success and 0 on failure (no PMD map is found).
	666	*/
6b637835 TK	667	int pmd_clear_huge(pmd_t *pmd)
	668	{
	669	if (pmd_large(*pmd)) {
	670	pmd_clear(pmd);
	671	return 1;
	672	}
	673
	674	return 0;
	675	}
	676	#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */