]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.suse/SoN-11-mm-page_alloc-emerg.patch
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.suse / SoN-11-mm-page_alloc-emerg.patch
CommitLineData
2cb7cef9
BS
1From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2Subject: mm: emergency pool
3Patch-mainline: No
4References: FATE#303834
5
6Provide means to reserve a specific amount of pages.
7
8The emergency pool is separated from the min watermark because ALLOC_HARDER
9and ALLOC_HIGH modify the watermark in a relative way and thus do not ensure
10a strict minimum.
11
12Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
13Acked-by: Neil Brown <neilb@suse.de>
14Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
15
16---
17 include/linux/mmzone.h | 6 ++-
18 mm/page_alloc.c | 86 ++++++++++++++++++++++++++++++++++++++++++-------
19 mm/vmstat.c | 6 +--
20 3 files changed, 83 insertions(+), 15 deletions(-)
21
22--- a/include/linux/mmzone.h
23+++ b/include/linux/mmzone.h
24@@ -206,7 +206,10 @@ enum zone_type {
25
26 struct zone {
27 /* Fields commonly accessed by the page allocator */
28- unsigned long pages_min, pages_low, pages_high;
29+ unsigned long pages_high; /* we stop kswapd */
30+ unsigned long pages_low; /* we wake up kswapd */
31+ unsigned long pages_min; /* we enter direct reclaim */
32+ unsigned long pages_emerg; /* emergency pool */
33 /*
34 * We don't know if the memory that we're going to allocate will be freeable
35 * or/and it will be released eventually, so to avoid totally wasting several
36@@ -674,6 +677,7 @@ int sysctl_min_unmapped_ratio_sysctl_han
37 struct file *, void __user *, size_t *, loff_t *);
38 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
39 struct file *, void __user *, size_t *, loff_t *);
40+int adjust_memalloc_reserve(int pages);
41
42 extern int numa_zonelist_order_handler(struct ctl_table *, int,
43 struct file *, void __user *, size_t *, loff_t *);
44--- a/mm/page_alloc.c
45+++ b/mm/page_alloc.c
46@@ -122,6 +122,8 @@ static char * const zone_names[MAX_NR_ZO
47
48 static DEFINE_SPINLOCK(min_free_lock);
49 int min_free_kbytes = 1024;
50+static DEFINE_MUTEX(var_free_mutex);
51+int var_free_kbytes;
52
53 unsigned long __meminitdata nr_kernel_pages;
54 unsigned long __meminitdata nr_all_pages;
55@@ -1241,7 +1243,7 @@ int zone_watermark_ok(struct zone *z, in
56 if (alloc_flags & ALLOC_HARDER)
57 min -= min / 4;
58
59- if (free_pages <= min + z->lowmem_reserve[classzone_idx])
60+ if (free_pages <= min+z->lowmem_reserve[classzone_idx]+z->pages_emerg)
61 return 0;
62 for (o = 0; o < order; o++) {
63 /* At the next order, this order's pages become unavailable */
64@@ -1508,7 +1510,7 @@ __alloc_pages_internal(gfp_t gfp_mask, u
65 struct reclaim_state reclaim_state;
66 struct task_struct *p = current;
67 int do_retry;
68- int alloc_flags;
69+ int alloc_flags = 0;
70 unsigned long did_some_progress;
71 unsigned long pages_reclaimed = 0;
72
73@@ -1679,9 +1681,9 @@ nopage:
74 printk(KERN_INFO "everything is working fine. Allocations from irqs cannot be\n");
75 printk(KERN_INFO "perfectly reliable and the kernel is designed to handle that.\n");
76 }
77- printk(KERN_INFO "%s: page allocation failure."
78- " order:%d, mode:0x%x\n",
79- p->comm, order, gfp_mask);
80+ printk(KERN_WARNING "%s: page allocation failure."
81+ " order:%d, mode:0x%x, alloc_flags:0x%x, pflags:0x%x\n",
82+ p->comm, order, gfp_mask, alloc_flags, p->flags);
83 dump_stack();
84 show_mem();
85 }
86@@ -1945,9 +1947,9 @@ void show_free_areas(void)
87 "\n",
88 zone->name,
89 K(zone_page_state(zone, NR_FREE_PAGES)),
90- K(zone->pages_min),
91- K(zone->pages_low),
92- K(zone->pages_high),
93+ K(zone->pages_emerg + zone->pages_min),
94+ K(zone->pages_emerg + zone->pages_low),
95+ K(zone->pages_emerg + zone->pages_high),
96 K(zone_page_state(zone, NR_ACTIVE)),
97 K(zone_page_state(zone, NR_INACTIVE)),
98 K(zone->present_pages),
99@@ -4211,7 +4213,7 @@ static void calculate_totalreserve_pages
100 }
101
102 /* we treat pages_high as reserved pages. */
103- max += zone->pages_high;
104+ max += zone->pages_high + zone->pages_emerg;
105
106 if (max > zone->present_pages)
107 max = zone->present_pages;
108@@ -4268,7 +4270,8 @@ static void setup_per_zone_lowmem_reserv
109 */
110 static void __setup_per_zone_pages_min(void)
111 {
112- unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
113+ unsigned pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
114+ unsigned pages_emerg = var_free_kbytes >> (PAGE_SHIFT - 10);
115 unsigned long lowmem_pages = 0;
116 struct zone *zone;
117 unsigned long flags;
118@@ -4280,11 +4283,13 @@ static void __setup_per_zone_pages_min(v
119 }
120
121 for_each_zone(zone) {
122- u64 tmp;
123+ u64 tmp, tmp_emerg;
124
125 spin_lock_irqsave(&zone->lock, flags);
126 tmp = (u64)pages_min * zone->present_pages;
127 do_div(tmp, lowmem_pages);
128+ tmp_emerg = (u64)pages_emerg * zone->present_pages;
129+ do_div(tmp_emerg, lowmem_pages);
130 if (is_highmem(zone)) {
131 /*
132 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
133@@ -4303,12 +4308,14 @@ static void __setup_per_zone_pages_min(v
134 if (min_pages > 128)
135 min_pages = 128;
136 zone->pages_min = min_pages;
137+ zone->pages_emerg = 0;
138 } else {
139 /*
140 * If it's a lowmem zone, reserve a number of pages
141 * proportionate to the zone's size.
142 */
143 zone->pages_min = tmp;
144+ zone->pages_emerg = tmp_emerg;
145 }
146
147 zone->pages_low = zone->pages_min + (tmp >> 2);
148@@ -4330,6 +4337,63 @@ void setup_per_zone_pages_min(void)
149 spin_unlock_irqrestore(&min_free_lock, flags);
150 }
151
152+static void __adjust_memalloc_reserve(int pages)
153+{
154+ var_free_kbytes += pages << (PAGE_SHIFT - 10);
155+ BUG_ON(var_free_kbytes < 0);
156+ setup_per_zone_pages_min();
157+}
158+
159+static int test_reserve_limits(void)
160+{
161+ struct zone *zone;
162+ int node;
163+
164+ for_each_zone(zone)
165+ wakeup_kswapd(zone, 0);
166+
167+ for_each_online_node(node) {
168+ struct page *page = alloc_pages_node(node, GFP_KERNEL, 0);
169+ if (!page)
170+ return -ENOMEM;
171+
172+ __free_page(page);
173+ }
174+
175+ return 0;
176+}
177+
178+/**
179+ * adjust_memalloc_reserve - adjust the memalloc reserve
180+ * @pages: number of pages to add
181+ *
182+ * It adds a number of pages to the memalloc reserve; if
183+ * the number was positive it kicks reclaim into action to
184+ * satisfy the higher watermarks.
185+ *
186+ * returns -ENOMEM when it failed to satisfy the watermarks.
187+ */
188+int adjust_memalloc_reserve(int pages)
189+{
190+ int err = 0;
191+
192+ mutex_lock(&var_free_mutex);
193+ __adjust_memalloc_reserve(pages);
194+ if (pages > 0) {
195+ err = test_reserve_limits();
196+ if (err) {
197+ __adjust_memalloc_reserve(-pages);
198+ goto unlock;
199+ }
200+ }
201+ printk(KERN_DEBUG "Emergency reserve: %d\n", var_free_kbytes);
202+
203+unlock:
204+ mutex_unlock(&var_free_mutex);
205+ return err;
206+}
207+EXPORT_SYMBOL_GPL(adjust_memalloc_reserve);
208+
209 /*
210 * Initialise min_free_kbytes.
211 *
212--- a/mm/vmstat.c
213+++ b/mm/vmstat.c
214@@ -692,9 +692,9 @@ static void zoneinfo_show_print(struct s
215 "\n spanned %lu"
216 "\n present %lu",
217 zone_page_state(zone, NR_FREE_PAGES),
218- zone->pages_min,
219- zone->pages_low,
220- zone->pages_high,
221+ zone->pages_emerg + zone->pages_min,
222+ zone->pages_emerg + zone->pages_low,
223+ zone->pages_emerg + zone->pages_high,
224 zone->pages_scanned,
225 zone->nr_scan_active, zone->nr_scan_inactive,
226 zone->spanned_pages,