]> git.ipfire.org Git - thirdparty/linux.git/blame - arch/x86/mm/init_64.c
x86: fix clflush_page_range logic
[thirdparty/linux.git] / arch / x86 / mm / init_64.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
1da177e4
LT
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
59170891 24#include <linux/pci.h>
6fb14755 25#include <linux/pfn.h>
c9cf5528 26#include <linux/poison.h>
17a941d8 27#include <linux/dma-mapping.h>
44df75e6
MT
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
ae32b129 30#include <linux/nmi.h>
1da177e4
LT
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
2bc0414e 45#include <asm/sections.h>
718fc13b 46#include <asm/kdebug.h>
aaa64e04 47#include <asm/numa.h>
1da177e4
LT
48
49#ifndef Dprintk
50#define Dprintk(x...)
51#endif
52
e6584504 53const struct dma_mapping_ops* dma_ops;
17a941d8
MBY
54EXPORT_SYMBOL(dma_ops);
55
e18c6874
AK
56static unsigned long dma_reserve __initdata;
57
1da177e4
LT
58DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60/*
61 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62 * physical space so we can cache the place of the first one and move
63 * around without checking the pgd every time.
64 */
65
66void show_mem(void)
67{
e92343cc
AK
68 long i, total = 0, reserved = 0;
69 long shared = 0, cached = 0;
1da177e4
LT
70 pg_data_t *pgdat;
71 struct page *page;
72
e92343cc 73 printk(KERN_INFO "Mem-info:\n");
1da177e4 74 show_free_areas();
e92343cc 75 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
1da177e4 76
ec936fc5 77 for_each_online_pgdat(pgdat) {
1da177e4 78 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
ae32b129
KR
79 /* this loop can take a while with 256 GB and 4k pages
80 so update the NMI watchdog */
81 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82 touch_nmi_watchdog();
83 }
12710a56
BP
84 if (!pfn_valid(pgdat->node_start_pfn + i))
85 continue;
1da177e4
LT
86 page = pfn_to_page(pgdat->node_start_pfn + i);
87 total++;
e92343cc
AK
88 if (PageReserved(page))
89 reserved++;
90 else if (PageSwapCache(page))
91 cached++;
92 else if (page_count(page))
93 shared += page_count(page) - 1;
1da177e4
LT
94 }
95 }
e92343cc
AK
96 printk(KERN_INFO "%lu pages of RAM\n", total);
97 printk(KERN_INFO "%lu reserved pages\n",reserved);
98 printk(KERN_INFO "%lu pages shared\n",shared);
99 printk(KERN_INFO "%lu pages swap cached\n",cached);
1da177e4
LT
100}
101
1da177e4
LT
102int after_bootmem;
103
5f44a669 104static __init void *spp_getpage(void)
1da177e4
LT
105{
106 void *ptr;
107 if (after_bootmem)
108 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
109 else
110 ptr = alloc_bootmem_pages(PAGE_SIZE);
111 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114 Dprintk("spp_getpage %p\n", ptr);
115 return ptr;
116}
117
5f44a669 118static __init void set_pte_phys(unsigned long vaddr,
1da177e4
LT
119 unsigned long phys, pgprot_t prot)
120{
121 pgd_t *pgd;
122 pud_t *pud;
123 pmd_t *pmd;
124 pte_t *pte, new_pte;
125
126 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128 pgd = pgd_offset_k(vaddr);
129 if (pgd_none(*pgd)) {
130 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131 return;
132 }
133 pud = pud_offset(pgd, vaddr);
134 if (pud_none(*pud)) {
135 pmd = (pmd_t *) spp_getpage();
136 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137 if (pmd != pmd_offset(pud, 0)) {
138 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139 return;
140 }
141 }
142 pmd = pmd_offset(pud, vaddr);
143 if (pmd_none(*pmd)) {
144 pte = (pte_t *) spp_getpage();
145 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146 if (pte != pte_offset_kernel(pmd, 0)) {
147 printk("PAGETABLE BUG #02!\n");
148 return;
149 }
150 }
151 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153 pte = pte_offset_kernel(pmd, vaddr);
154 if (!pte_none(*pte) &&
155 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156 pte_ERROR(*pte);
157 set_pte(pte, new_pte);
158
159 /*
160 * It's enough to flush this one mapping.
161 * (PGE mappings get flushed as well)
162 */
163 __flush_tlb_one(vaddr);
164}
165
166/* NOTE: this is meant to be run only at boot */
5f44a669
AK
167void __init
168__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
1da177e4
LT
169{
170 unsigned long address = __fix_to_virt(idx);
171
172 if (idx >= __end_of_fixed_addresses) {
173 printk("Invalid __set_fixmap\n");
174 return;
175 }
176 set_pte_phys(address, phys, prot);
177}
178
75175278
AK
179static unsigned long __initdata table_start;
180static unsigned long __meminitdata table_end;
1da177e4 181
dafe41ee 182static __meminit void *alloc_low_page(unsigned long *phys)
1da177e4 183{
dafe41ee 184 unsigned long pfn = table_end++;
1da177e4
LT
185 void *adr;
186
44df75e6
MT
187 if (after_bootmem) {
188 adr = (void *)get_zeroed_page(GFP_ATOMIC);
189 *phys = __pa(adr);
190 return adr;
191 }
192
1da177e4
LT
193 if (pfn >= end_pfn)
194 panic("alloc_low_page: ran out of memory");
dafe41ee
VG
195
196 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
44df75e6 197 memset(adr, 0, PAGE_SIZE);
dafe41ee
VG
198 *phys = pfn * PAGE_SIZE;
199 return adr;
200}
1da177e4 201
dafe41ee 202static __meminit void unmap_low_page(void *adr)
1da177e4 203{
44df75e6
MT
204
205 if (after_bootmem)
206 return;
207
dafe41ee 208 early_iounmap(adr, PAGE_SIZE);
1da177e4
LT
209}
210
f2d3efed 211/* Must run before zap_low_mappings */
a3142c8e 212__meminit void *early_ioremap(unsigned long addr, unsigned long size)
f2d3efed 213{
dafe41ee
VG
214 unsigned long vaddr;
215 pmd_t *pmd, *last_pmd;
216 int i, pmds;
217
218 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
219 vaddr = __START_KERNEL_map;
220 pmd = level2_kernel_pgt;
221 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
222 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
223 for (i = 0; i < pmds; i++) {
224 if (pmd_present(pmd[i]))
225 goto next;
226 }
227 vaddr += addr & ~PMD_MASK;
228 addr &= PMD_MASK;
229 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
929fd589 230 set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
1a2b4412 231 __flush_tlb_all();
dafe41ee
VG
232 return (void *)vaddr;
233 next:
234 ;
f2d3efed 235 }
dafe41ee
VG
236 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
237 return NULL;
f2d3efed
AK
238}
239
240/* To avoid virtual aliases later */
a3142c8e 241__meminit void early_iounmap(void *addr, unsigned long size)
f2d3efed 242{
dafe41ee
VG
243 unsigned long vaddr;
244 pmd_t *pmd;
245 int i, pmds;
246
247 vaddr = (unsigned long)addr;
248 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
249 pmd = level2_kernel_pgt + pmd_index(vaddr);
250 for (i = 0; i < pmds; i++)
251 pmd_clear(pmd + i);
1a2b4412 252 __flush_tlb_all();
f2d3efed
AK
253}
254
44df75e6 255static void __meminit
6ad91658 256phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
44df75e6 257{
6ad91658 258 int i = pmd_index(address);
44df75e6 259
6ad91658 260 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
44df75e6 261 unsigned long entry;
6ad91658 262 pmd_t *pmd = pmd_page + pmd_index(address);
44df75e6 263
5f51e139
JB
264 if (address >= end) {
265 if (!after_bootmem)
266 for (; i < PTRS_PER_PMD; i++, pmd++)
267 set_pmd(pmd, __pmd(0));
44df75e6
MT
268 break;
269 }
6ad91658
KM
270
271 if (pmd_val(*pmd))
272 continue;
273
40842bf5 274 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
44df75e6
MT
275 entry &= __supported_pte_mask;
276 set_pmd(pmd, __pmd(entry));
277 }
278}
279
280static void __meminit
281phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
282{
6ad91658
KM
283 pmd_t *pmd = pmd_offset(pud,0);
284 spin_lock(&init_mm.page_table_lock);
285 phys_pmd_init(pmd, address, end);
286 spin_unlock(&init_mm.page_table_lock);
287 __flush_tlb_all();
44df75e6
MT
288}
289
6ad91658 290static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
1da177e4 291{
6ad91658 292 int i = pud_index(addr);
44df75e6 293
44df75e6 294
6ad91658 295 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
6ad91658
KM
296 unsigned long pmd_phys;
297 pud_t *pud = pud_page + pud_index(addr);
1da177e4
LT
298 pmd_t *pmd;
299
6ad91658 300 if (addr >= end)
1da177e4 301 break;
1da177e4 302
6ad91658 303 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
1da177e4
LT
304 set_pud(pud, __pud(0));
305 continue;
306 }
307
6ad91658
KM
308 if (pud_val(*pud)) {
309 phys_pmd_update(pud, addr, end);
310 continue;
311 }
312
dafe41ee 313 pmd = alloc_low_page(&pmd_phys);
44df75e6 314 spin_lock(&init_mm.page_table_lock);
1da177e4 315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
6ad91658 316 phys_pmd_init(pmd, addr, end);
44df75e6 317 spin_unlock(&init_mm.page_table_lock);
dafe41ee 318 unmap_low_page(pmd);
1da177e4 319 }
1a2b4412 320 __flush_tlb_all();
1da177e4
LT
321}
322
323static void __init find_early_table_space(unsigned long end)
324{
6c5acd16 325 unsigned long puds, pmds, tables, start;
1da177e4
LT
326
327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
ee408c79
AK
332 /* RED-PEN putting page tables only on node 0 could
333 cause a hotspot and fill up ZONE_DMA. The page tables
334 need roughly 0.5KB per GB. */
335 start = 0x8000;
336 table_start = find_e820_area(start, end, tables);
1da177e4
LT
337 if (table_start == -1UL)
338 panic("Cannot find space for the kernel page tables");
339
340 table_start >>= PAGE_SHIFT;
341 table_end = table_start;
44df75e6
MT
342
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
5f51e139
JB
344 end, table_start << PAGE_SHIFT,
345 (table_start << PAGE_SHIFT) + tables);
1da177e4
LT
346}
347
348/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
349 This runs before bootmem is initialized and gets pages directly from the
350 physical memory. To access them they are temporarily mapped. */
b6fd6ecb 351void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
1da177e4
LT
352{
353 unsigned long next;
354
355 Dprintk("init_memory_mapping\n");
356
357 /*
358 * Find space for the kernel direct mapping tables.
359 * Later we should allocate these tables in the local node of the memory
360 * mapped. Unfortunately this is done currently before the nodes are
361 * discovered.
362 */
44df75e6
MT
363 if (!after_bootmem)
364 find_early_table_space(end);
1da177e4
LT
365
366 start = (unsigned long)__va(start);
367 end = (unsigned long)__va(end);
368
369 for (; start < end; start = next) {
1da177e4 370 unsigned long pud_phys;
44df75e6
MT
371 pgd_t *pgd = pgd_offset_k(start);
372 pud_t *pud;
373
374 if (after_bootmem)
d2ae5b5f 375 pud = pud_offset(pgd, start & PGDIR_MASK);
44df75e6 376 else
dafe41ee 377 pud = alloc_low_page(&pud_phys);
44df75e6 378
1da177e4
LT
379 next = start + PGDIR_SIZE;
380 if (next > end)
381 next = end;
382 phys_pud_init(pud, __pa(start), __pa(next));
44df75e6
MT
383 if (!after_bootmem)
384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
dafe41ee 385 unmap_low_page(pud);
1da177e4
LT
386 }
387
44df75e6 388 if (!after_bootmem)
f51c9452 389 mmu_cr4_features = read_cr4();
1da177e4 390 __flush_tlb_all();
75175278
AK
391
392 reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
1da177e4
LT
393}
394
2b97690f 395#ifndef CONFIG_NUMA
1da177e4
LT
396void __init paging_init(void)
397{
6391af17
MG
398 unsigned long max_zone_pfns[MAX_NR_ZONES];
399 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
400 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
401 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
402 max_zone_pfns[ZONE_NORMAL] = end_pfn;
403
44df75e6
MT
404 memory_present(0, 0, end_pfn);
405 sparse_init();
5cb248ab 406 free_area_init_nodes(max_zone_pfns);
1da177e4
LT
407}
408#endif
409
410/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
411 from the CPU leading to inconsistent cache lines. address and size
412 must be aligned to 2MB boundaries.
413 Does nothing when the mapping doesn't exist. */
414void __init clear_kernel_mapping(unsigned long address, unsigned long size)
415{
416 unsigned long end = address + size;
417
418 BUG_ON(address & ~LARGE_PAGE_MASK);
419 BUG_ON(size & ~LARGE_PAGE_MASK);
420
421 for (; address < end; address += LARGE_PAGE_SIZE) {
422 pgd_t *pgd = pgd_offset_k(address);
423 pud_t *pud;
424 pmd_t *pmd;
425 if (pgd_none(*pgd))
426 continue;
427 pud = pud_offset(pgd, address);
428 if (pud_none(*pud))
429 continue;
430 pmd = pmd_offset(pud, address);
431 if (!pmd || pmd_none(*pmd))
432 continue;
433 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
434 /* Could handle this, but it should not happen currently. */
435 printk(KERN_ERR
436 "clear_kernel_mapping: mapping has been split. will leak memory\n");
437 pmd_ERROR(*pmd);
438 }
439 set_pmd(pmd, __pmd(0));
440 }
441 __flush_tlb_all();
442}
443
44df75e6
MT
444/*
445 * Memory hotplug specific functions
44df75e6 446 */
44df75e6
MT
447void online_page(struct page *page)
448{
449 ClearPageReserved(page);
7835e98b 450 init_page_count(page);
44df75e6
MT
451 __free_page(page);
452 totalram_pages++;
453 num_physpages++;
454}
455
bc02af93 456#ifdef CONFIG_MEMORY_HOTPLUG
9d99aaa3
AK
457/*
458 * Memory is added always to NORMAL zone. This means you will never get
459 * additional DMA/DMA32 memory.
460 */
bc02af93 461int arch_add_memory(int nid, u64 start, u64 size)
44df75e6 462{
bc02af93 463 struct pglist_data *pgdat = NODE_DATA(nid);
776ed98b 464 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
44df75e6
MT
465 unsigned long start_pfn = start >> PAGE_SHIFT;
466 unsigned long nr_pages = size >> PAGE_SHIFT;
467 int ret;
468
45e0b78b
KM
469 init_memory_mapping(start, (start + size -1));
470
44df75e6
MT
471 ret = __add_pages(zone, start_pfn, nr_pages);
472 if (ret)
473 goto error;
474
44df75e6
MT
475 return ret;
476error:
477 printk("%s: Problem encountered in __add_pages!\n", __func__);
478 return ret;
479}
bc02af93 480EXPORT_SYMBOL_GPL(arch_add_memory);
44df75e6 481
8243229f 482#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
4942e998
KM
483int memory_add_physaddr_to_nid(u64 start)
484{
485 return 0;
486}
8c2676a5 487EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
4942e998
KM
488#endif
489
45e0b78b
KM
490#endif /* CONFIG_MEMORY_HOTPLUG */
491
1da177e4
LT
492static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
493 kcore_vsyscall;
494
495void __init mem_init(void)
496{
0a43e4bf 497 long codesize, reservedpages, datasize, initsize;
1da177e4 498
0dc243ae 499 pci_iommu_alloc();
1da177e4 500
48ddb154 501 /* clear_bss() already clear the empty_zero_page */
1da177e4 502
f2633105
IM
503 /* temporary debugging - double check it's true: */
504 {
505 int i;
506
507 for (i = 0; i < 1024; i++)
508 WARN_ON_ONCE(empty_zero_page[i]);
509 }
510
1da177e4
LT
511 reservedpages = 0;
512
513 /* this will put all low memory onto the freelists */
2b97690f 514#ifdef CONFIG_NUMA
0a43e4bf 515 totalram_pages = numa_free_all_bootmem();
1da177e4 516#else
0a43e4bf 517 totalram_pages = free_all_bootmem();
1da177e4 518#endif
5cb248ab
MG
519 reservedpages = end_pfn - totalram_pages -
520 absent_pages_in_range(0, end_pfn);
1da177e4
LT
521
522 after_bootmem = 1;
523
524 codesize = (unsigned long) &_etext - (unsigned long) &_text;
525 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
526 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
527
528 /* Register memory areas for /proc/kcore */
529 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
530 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
531 VMALLOC_END-VMALLOC_START);
532 kclist_add(&kcore_kernel, &_stext, _end - _stext);
533 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
534 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
535 VSYSCALL_END - VSYSCALL_START);
536
0a43e4bf 537 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
1da177e4
LT
538 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
539 end_pfn << (PAGE_SHIFT-10),
540 codesize >> 10,
541 reservedpages << (PAGE_SHIFT-10),
542 datasize >> 10,
543 initsize >> 10);
1da177e4
LT
544}
545
d167a518 546void free_init_pages(char *what, unsigned long begin, unsigned long end)
1da177e4
LT
547{
548 unsigned long addr;
549
d167a518
GH
550 if (begin >= end)
551 return;
552
6fb14755 553 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
d167a518 554 for (addr = begin; addr < end; addr += PAGE_SIZE) {
e3ebadd9
LT
555 ClearPageReserved(virt_to_page(addr));
556 init_page_count(virt_to_page(addr));
557 memset((void *)(addr & ~(PAGE_SIZE-1)),
558 POISON_FREE_INITMEM, PAGE_SIZE);
e3ebadd9 559 free_page(addr);
1da177e4
LT
560 totalram_pages++;
561 }
f62d0f00
IM
562#ifdef CONFIG_DEBUG_RODATA
563 /*
564 * This will make the __init pages not present and
565 * not executable, so that any attempt to use a
566 * __init function from now on will fault immediately
567 * rather than supriously later when memory gets reused.
568 *
569 * We only do this for DEBUG_RODATA to not break up the
570 * 2Mb kernel mapping just for this debug feature.
571 */
572 if (begin >= __START_KERNEL_map) {
3c1df68b 573 set_memory_rw(begin, (end - begin)/PAGE_SIZE);
f62d0f00
IM
574 set_memory_np(begin, (end - begin)/PAGE_SIZE);
575 set_memory_nx(begin, (end - begin)/PAGE_SIZE);
edeed305 576 rodata_test();
f62d0f00
IM
577 }
578#endif
d167a518
GH
579}
580
581void free_initmem(void)
582{
d167a518 583 free_init_pages("unused kernel memory",
e3ebadd9
LT
584 (unsigned long)(&__init_begin),
585 (unsigned long)(&__init_end));
1da177e4
LT
586}
587
67df197b 588#ifdef CONFIG_DEBUG_RODATA
edeed305
AV
589const int rodata_test_data = 0xC3;
590EXPORT_SYMBOL_GPL(rodata_test_data);
67df197b 591
67df197b
AV
592void mark_rodata_ro(void)
593{
e3ebadd9 594 unsigned long start = (unsigned long)_stext, end;
67df197b 595
602033ed
LT
596#ifdef CONFIG_HOTPLUG_CPU
597 /* It must still be possible to apply SMP alternatives. */
598 if (num_possible_cpus() > 1)
599 start = (unsigned long)_etext;
600#endif
601
602#ifdef CONFIG_KPROBES
603 start = (unsigned long)__start_rodata;
604#endif
605
e3ebadd9
LT
606 end = (unsigned long)__end_rodata;
607 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
608 end &= PAGE_MASK;
609 if (end <= start)
610 return;
611
6d238cc4 612 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
67df197b 613
6fb14755 614 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
e3ebadd9 615 (end - start) >> 10);
67df197b 616
0c42f392
AK
617#ifdef CONFIG_CPA_DEBUG
618 printk("Testing CPA: undo %lx-%lx\n", start, end);
6d238cc4 619 set_memory_rw(start, (end-start) >> PAGE_SHIFT);
0c42f392
AK
620
621 printk("Testing CPA: again\n");
6d238cc4 622 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
0c42f392 623#endif
67df197b
AV
624}
625#endif
626
1da177e4
LT
627#ifdef CONFIG_BLK_DEV_INITRD
628void free_initrd_mem(unsigned long start, unsigned long end)
629{
e3ebadd9 630 free_init_pages("initrd memory", start, end);
1da177e4
LT
631}
632#endif
633
634void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
635{
2b97690f 636#ifdef CONFIG_NUMA
1da177e4 637 int nid = phys_to_nid(phys);
5e58a02a
AK
638#endif
639 unsigned long pfn = phys >> PAGE_SHIFT;
640 if (pfn >= end_pfn) {
641 /* This can happen with kdump kernels when accessing firmware
642 tables. */
643 if (pfn < end_pfn_map)
644 return;
645 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
646 phys, len);
647 return;
648 }
649
650 /* Should check here against the e820 map to avoid double free */
651#ifdef CONFIG_NUMA
1da177e4
LT
652 reserve_bootmem_node(NODE_DATA(nid), phys, len);
653#else
654 reserve_bootmem(phys, len);
655#endif
0e0b864e 656 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
e18c6874 657 dma_reserve += len / PAGE_SIZE;
0e0b864e
MG
658 set_dma_reserve(dma_reserve);
659 }
1da177e4
LT
660}
661
662int kern_addr_valid(unsigned long addr)
663{
664 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
665 pgd_t *pgd;
666 pud_t *pud;
667 pmd_t *pmd;
668 pte_t *pte;
669
670 if (above != 0 && above != -1UL)
671 return 0;
672
673 pgd = pgd_offset_k(addr);
674 if (pgd_none(*pgd))
675 return 0;
676
677 pud = pud_offset(pgd, addr);
678 if (pud_none(*pud))
679 return 0;
680
681 pmd = pmd_offset(pud, addr);
682 if (pmd_none(*pmd))
683 return 0;
684 if (pmd_large(*pmd))
685 return pfn_valid(pmd_pfn(*pmd));
686
687 pte = pte_offset_kernel(pmd, addr);
688 if (pte_none(*pte))
689 return 0;
690 return pfn_valid(pte_pfn(*pte));
691}
692
103efcd9 693/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
1e014410
AK
694 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
695 not need special handling anymore. */
1da177e4
LT
696
697static struct vm_area_struct gate_vma = {
698 .vm_start = VSYSCALL_START,
103efcd9
EP
699 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
700 .vm_page_prot = PAGE_READONLY_EXEC,
701 .vm_flags = VM_READ | VM_EXEC
1da177e4
LT
702};
703
1da177e4
LT
704struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
705{
706#ifdef CONFIG_IA32_EMULATION
1e014410
AK
707 if (test_tsk_thread_flag(tsk, TIF_IA32))
708 return NULL;
1da177e4
LT
709#endif
710 return &gate_vma;
711}
712
713int in_gate_area(struct task_struct *task, unsigned long addr)
714{
715 struct vm_area_struct *vma = get_gate_vma(task);
1e014410
AK
716 if (!vma)
717 return 0;
1da177e4
LT
718 return (addr >= vma->vm_start) && (addr < vma->vm_end);
719}
720
721/* Use this when you have no reliable task/vma, typically from interrupt
722 * context. It is less reliable than using the task's vma and may give
723 * false positives.
724 */
725int in_gate_area_no_task(unsigned long addr)
726{
1e014410 727 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1da177e4 728}
2e1c49db 729
2aae950b
AK
730const char *arch_vma_name(struct vm_area_struct *vma)
731{
732 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
733 return "[vdso]";
734 if (vma == &gate_vma)
735 return "[vsyscall]";
736 return NULL;
737}
0889eba5
CL
738
739#ifdef CONFIG_SPARSEMEM_VMEMMAP
740/*
741 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
742 */
743int __meminit vmemmap_populate(struct page *start_page,
744 unsigned long size, int node)
745{
746 unsigned long addr = (unsigned long)start_page;
747 unsigned long end = (unsigned long)(start_page + size);
748 unsigned long next;
749 pgd_t *pgd;
750 pud_t *pud;
751 pmd_t *pmd;
752
753 for (; addr < end; addr = next) {
754 next = pmd_addr_end(addr, end);
755
756 pgd = vmemmap_pgd_populate(addr, node);
757 if (!pgd)
758 return -ENOMEM;
759 pud = vmemmap_pud_populate(pgd, addr, node);
760 if (!pud)
761 return -ENOMEM;
762
763 pmd = pmd_offset(pud, addr);
764 if (pmd_none(*pmd)) {
765 pte_t entry;
766 void *p = vmemmap_alloc_block(PMD_SIZE, node);
767 if (!p)
768 return -ENOMEM;
769
27ec161f 770 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
0889eba5
CL
771 set_pmd(pmd, __pmd(pte_val(entry)));
772
773 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
774 addr, addr + PMD_SIZE - 1, p, node);
775 } else
776 vmemmap_verify((pte_t *)pmd, node, addr, next);
777 }
778
779 return 0;
780}
781#endif