1 From: jbeulich@novell.com
2 Subject: improve interaction with mm's accounting
4 Patch-mainline: obsolete
6 --- sle11-2009-08-26.orig/arch/x86/kernel/e820-xen.c 2009-08-31 11:47:11.000000000 +0200
7 +++ sle11-2009-08-26/arch/x86/kernel/e820-xen.c 2009-06-25 14:20:32.000000000 +0200
8 @@ -1221,6 +1221,11 @@ int __init e820_find_active_region(const
10 u64 align = PAGE_SIZE;
13 + if (last_pfn > xen_start_info->nr_pages)
14 + last_pfn = xen_start_info->nr_pages;
17 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
18 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
20 @@ -1255,6 +1260,10 @@ void __init e820_register_active_regions
22 &ei_startpfn, &ei_endpfn))
23 add_active_range(nid, ei_startpfn, ei_endpfn);
26 + add_active_range(nid, last_pfn, last_pfn);
31 --- sle11-2009-08-26.orig/arch/x86/mm/init_32-xen.c 2009-08-31 11:47:11.000000000 +0200
32 +++ sle11-2009-08-26/arch/x86/mm/init_32-xen.c 2009-08-26 12:11:50.000000000 +0200
33 @@ -399,8 +399,7 @@ static void __init add_one_highpage_init
35 ClearPageReserved(page);
36 init_page_count(page);
37 - if (pfn < xen_start_info->nr_pages)
43 @@ -450,8 +449,16 @@ void __init add_highpages_with_active_re
45 static void __init set_highmem_pages_init(void)
49 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
51 + /* XEN: init high-mem pages outside initial allocation. */
52 + for (pfn = xen_start_info->nr_pages; pfn < highend_pfn; pfn++) {
53 + ClearPageReserved(pfn_to_page(pfn));
54 + init_page_count(pfn_to_page(pfn));
57 totalram_pages += totalhigh_pages;
59 #endif /* !CONFIG_NUMA */
60 @@ -1006,11 +1013,10 @@ void __init mem_init(void)
62 /* this will put all low memory onto the freelists */
63 totalram_pages += free_all_bootmem();
64 - /* XEN: init and count low-mem pages outside initial allocation. */
65 + /* XEN: init low-mem pages outside initial allocation. */
66 for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
67 ClearPageReserved(pfn_to_page(pfn));
68 init_page_count(pfn_to_page(pfn));
73 --- sle11-2009-08-26.orig/arch/x86/mm/init_64-xen.c 2009-08-31 11:47:11.000000000 +0200
74 +++ sle11-2009-08-26/arch/x86/mm/init_64-xen.c 2009-08-26 12:11:53.000000000 +0200
75 @@ -1085,11 +1085,10 @@ void __init mem_init(void)
77 totalram_pages = free_all_bootmem();
79 - /* XEN: init and count pages outside initial allocation. */
80 + /* XEN: init pages outside initial allocation. */
81 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
82 ClearPageReserved(pfn_to_page(pfn));
83 init_page_count(pfn_to_page(pfn));
86 reservedpages = max_pfn - totalram_pages -
87 absent_pages_in_range(0, max_pfn);
88 --- sle11-2009-08-26.orig/drivers/xen/balloon/balloon.c 2009-06-29 15:44:49.000000000 +0200
89 +++ sle11-2009-08-26/drivers/xen/balloon/balloon.c 2009-08-31 11:47:38.000000000 +0200
91 #include <linux/sched.h>
92 #include <linux/errno.h>
94 +#include <linux/swap.h>
95 #include <linux/mman.h>
96 #include <linux/smp_lock.h>
97 #include <linux/pagemap.h>
98 @@ -81,11 +82,7 @@ struct balloon_stats balloon_stats;
99 /* We increase/decrease in batches which fit in a page */
100 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
102 -/* VM /proc information for memory */
103 -extern unsigned long totalram_pages;
105 #ifdef CONFIG_HIGHMEM
106 -extern unsigned long totalhigh_pages;
107 #define inc_totalhigh_pages() (totalhigh_pages++)
108 #define dec_totalhigh_pages() (totalhigh_pages--)
110 @@ -121,29 +118,44 @@ static struct timer_list balloon_timer;
111 printk(KERN_WARNING "xen_mem: " fmt, ##args)
113 /* balloon_append: add the given page to the balloon. */
114 -static void balloon_append(struct page *page)
115 +static void balloon_append(struct page *page, int account)
119 /* Lowmem is re-populated first, so highmem pages go at list tail. */
120 if (PageHighMem(page)) {
121 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
123 - dec_totalhigh_pages();
125 + dec_totalhigh_pages();
127 list_add(PAGE_TO_LIST(page), &ballooned_pages);
131 + pfn = page_to_pfn(page);
133 + SetPageReserved(page);
134 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
135 + page_zone(page)->present_pages--;
137 + BUG_ON(!PageReserved(page));
138 + WARN_ON_ONCE(phys_to_machine_mapping_valid(pfn));
142 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
143 -static struct page *balloon_retrieve(void)
144 +static struct page *balloon_retrieve(int *was_empty)
149 if (list_empty(&ballooned_pages))
152 page = LIST_TO_PAGE(ballooned_pages.next);
154 + BUG_ON(!PageReserved(page));
156 if (PageHighMem(page)) {
158 @@ -151,6 +163,9 @@ static struct page *balloon_retrieve(voi
162 + zone = page_zone(page);
163 + *was_empty |= !populated_zone(zone);
164 + zone->present_pages++;
168 @@ -236,6 +251,7 @@ static int increase_reservation(unsigned
169 unsigned long pfn, i, flags;
172 + int need_zonelists_rebuild = 0;
173 struct xen_memory_reservation reservation = {
176 @@ -261,7 +277,7 @@ static int increase_reservation(unsigned
179 for (i = 0; i < rc; i++) {
180 - page = balloon_retrieve();
181 + page = balloon_retrieve(&need_zonelists_rebuild);
182 BUG_ON(page == NULL);
184 pfn = page_to_pfn(page);
185 @@ -294,6 +310,14 @@ static int increase_reservation(unsigned
187 balloon_unlock(flags);
190 + setup_per_zone_pages_min();
191 + if (need_zonelists_rebuild)
192 + build_all_zonelists();
194 + vm_total_pages = nr_free_pagecache_pages();
197 return rc < 0 ? rc : rc != nr_pages;
200 @@ -352,8 +376,7 @@ static int decrease_reservation(unsigned
201 /* No more mappings: invalidate P2M and add to balloon. */
202 for (i = 0; i < nr_pages; i++) {
203 pfn = mfn_to_pfn(frame_list[i]);
204 - set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
205 - balloon_append(pfn_to_page(pfn));
206 + balloon_append(pfn_to_page(pfn), 1);
209 set_xen_guest_handle(reservation.extent_start, frame_list);
210 @@ -541,8 +564,11 @@ static int __init balloon_init(void)
211 /* Initialise the balloon with excess memory space. */
212 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
213 page = pfn_to_page(pfn);
214 - if (!PageReserved(page))
215 - balloon_append(page);
216 + if (!PageReserved(page)) {
217 + SetPageReserved(page);
218 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
219 + balloon_append(page, 0);
224 @@ -577,7 +603,7 @@ void balloon_update_driver_allowance(lon
225 static int dealloc_pte_fn(
226 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
228 - unsigned long mfn = pte_mfn(*pte);
229 + unsigned long pfn, mfn = pte_mfn(*pte);
231 struct xen_memory_reservation reservation = {
233 @@ -586,7 +612,9 @@ static int dealloc_pte_fn(
235 set_xen_guest_handle(reservation.extent_start, &mfn);
236 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
237 - set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
238 + pfn = __pa(addr) >> PAGE_SHIFT;
239 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
240 + SetPageReserved(pfn_to_page(pfn));
241 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
244 @@ -644,6 +672,9 @@ struct page **alloc_empty_pages_and_page
247 totalram_pages = --bs.current_pages;
248 + if (PageHighMem(page))
249 + dec_totalhigh_pages();
250 + page_zone(page)->present_pages--;
252 balloon_unlock(flags);
254 @@ -658,7 +689,7 @@ struct page **alloc_empty_pages_and_page
258 - balloon_append(pagevec[i]);
259 + balloon_append(pagevec[i], 0);
260 balloon_unlock(flags);
263 @@ -676,7 +707,7 @@ void free_empty_pages_and_pagevec(struct
265 for (i = 0; i < nr_pages; i++) {
266 BUG_ON(page_count(pagevec[i]) != 1);
267 - balloon_append(pagevec[i]);
268 + balloon_append(pagevec[i], 0);
270 balloon_unlock(flags);
272 @@ -690,7 +721,7 @@ void balloon_release_driver_page(struct
276 - balloon_append(page);
277 + balloon_append(page, 1);
279 balloon_unlock(flags);
281 --- sle11-2009-08-26.orig/mm/page_alloc.c 2009-08-31 11:47:11.000000000 +0200
282 +++ sle11-2009-08-26/mm/page_alloc.c 2009-07-31 15:14:57.000000000 +0200
283 @@ -4343,6 +4343,23 @@ static void __setup_per_zone_pages_min(v
284 spin_unlock_irqrestore(&zone->lock, flags);
288 + for_each_zone(zone) {
291 + if (!populated_zone(zone))
293 + for_each_online_cpu(cpu) {
294 + unsigned long high;
296 + high = percpu_pagelist_fraction
297 + ? zone->present_pages / percpu_pagelist_fraction
298 + : 5 * zone_batchsize(zone);
299 + setup_pagelist_highmark(zone_pcp(zone, cpu), high);
304 /* update totalreserve_pages */
305 calculate_totalreserve_pages();