]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.suse/SoN-11-mm-page_alloc-emerg.patch
Merge branch 'master' of git://git.ipfire.org/ipfire-2.x
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / SoN-11-mm-page_alloc-emerg.patch
1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: mm: emergency pool
3 Patch-mainline: No
4 References: FATE#303834
5
6 Provide means to reserve a specific amount of pages.
7
8 The emergency pool is separated from the min watermark because ALLOC_HARDER
9 and ALLOC_HIGH modify the watermark in a relative way and thus do not ensure
10 a strict minimum.
11
12 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
13 Acked-by: Neil Brown <neilb@suse.de>
14 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
15
16 ---
17 include/linux/mmzone.h | 6 ++-
18 mm/page_alloc.c | 86 ++++++++++++++++++++++++++++++++++++++++++-------
19 mm/vmstat.c | 6 +--
20 3 files changed, 83 insertions(+), 15 deletions(-)
21
22 --- a/include/linux/mmzone.h
23 +++ b/include/linux/mmzone.h
24 @@ -206,7 +206,10 @@ enum zone_type {
25
26 struct zone {
27 /* Fields commonly accessed by the page allocator */
28 - unsigned long pages_min, pages_low, pages_high;
29 + unsigned long pages_high; /* we stop kswapd */
30 + unsigned long pages_low; /* we wake up kswapd */
31 + unsigned long pages_min; /* we enter direct reclaim */
32 + unsigned long pages_emerg; /* emergency pool */
33 /*
34 * We don't know if the memory that we're going to allocate will be freeable
35 * or/and it will be released eventually, so to avoid totally wasting several
36 @@ -674,6 +677,7 @@ int sysctl_min_unmapped_ratio_sysctl_han
37 struct file *, void __user *, size_t *, loff_t *);
38 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
39 struct file *, void __user *, size_t *, loff_t *);
40 +int adjust_memalloc_reserve(int pages);
41
42 extern int numa_zonelist_order_handler(struct ctl_table *, int,
43 struct file *, void __user *, size_t *, loff_t *);
44 --- a/mm/page_alloc.c
45 +++ b/mm/page_alloc.c
46 @@ -122,6 +122,8 @@ static char * const zone_names[MAX_NR_ZO
47
48 static DEFINE_SPINLOCK(min_free_lock);
49 int min_free_kbytes = 1024;
50 +static DEFINE_MUTEX(var_free_mutex);
51 +int var_free_kbytes;
52
53 unsigned long __meminitdata nr_kernel_pages;
54 unsigned long __meminitdata nr_all_pages;
55 @@ -1241,7 +1243,7 @@ int zone_watermark_ok(struct zone *z, in
56 if (alloc_flags & ALLOC_HARDER)
57 min -= min / 4;
58
59 - if (free_pages <= min + z->lowmem_reserve[classzone_idx])
60 + if (free_pages <= min+z->lowmem_reserve[classzone_idx]+z->pages_emerg)
61 return 0;
62 for (o = 0; o < order; o++) {
63 /* At the next order, this order's pages become unavailable */
64 @@ -1508,7 +1510,7 @@ __alloc_pages_internal(gfp_t gfp_mask, u
65 struct reclaim_state reclaim_state;
66 struct task_struct *p = current;
67 int do_retry;
68 - int alloc_flags;
69 + int alloc_flags = 0;
70 unsigned long did_some_progress;
71 unsigned long pages_reclaimed = 0;
72
73 @@ -1679,9 +1681,9 @@ nopage:
74 printk(KERN_INFO "everything is working fine. Allocations from irqs cannot be\n");
75 printk(KERN_INFO "perfectly reliable and the kernel is designed to handle that.\n");
76 }
77 - printk(KERN_INFO "%s: page allocation failure."
78 - " order:%d, mode:0x%x\n",
79 - p->comm, order, gfp_mask);
80 + printk(KERN_WARNING "%s: page allocation failure."
81 + " order:%d, mode:0x%x, alloc_flags:0x%x, pflags:0x%x\n",
82 + p->comm, order, gfp_mask, alloc_flags, p->flags);
83 dump_stack();
84 show_mem();
85 }
86 @@ -1945,9 +1947,9 @@ void show_free_areas(void)
87 "\n",
88 zone->name,
89 K(zone_page_state(zone, NR_FREE_PAGES)),
90 - K(zone->pages_min),
91 - K(zone->pages_low),
92 - K(zone->pages_high),
93 + K(zone->pages_emerg + zone->pages_min),
94 + K(zone->pages_emerg + zone->pages_low),
95 + K(zone->pages_emerg + zone->pages_high),
96 K(zone_page_state(zone, NR_ACTIVE)),
97 K(zone_page_state(zone, NR_INACTIVE)),
98 K(zone->present_pages),
99 @@ -4211,7 +4213,7 @@ static void calculate_totalreserve_pages
100 }
101
102 /* we treat pages_high as reserved pages. */
103 - max += zone->pages_high;
104 + max += zone->pages_high + zone->pages_emerg;
105
106 if (max > zone->present_pages)
107 max = zone->present_pages;
108 @@ -4268,7 +4270,8 @@ static void setup_per_zone_lowmem_reserv
109 */
110 static void __setup_per_zone_pages_min(void)
111 {
112 - unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
113 + unsigned pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
114 + unsigned pages_emerg = var_free_kbytes >> (PAGE_SHIFT - 10);
115 unsigned long lowmem_pages = 0;
116 struct zone *zone;
117 unsigned long flags;
118 @@ -4280,11 +4283,13 @@ static void __setup_per_zone_pages_min(v
119 }
120
121 for_each_zone(zone) {
122 - u64 tmp;
123 + u64 tmp, tmp_emerg;
124
125 spin_lock_irqsave(&zone->lock, flags);
126 tmp = (u64)pages_min * zone->present_pages;
127 do_div(tmp, lowmem_pages);
128 + tmp_emerg = (u64)pages_emerg * zone->present_pages;
129 + do_div(tmp_emerg, lowmem_pages);
130 if (is_highmem(zone)) {
131 /*
132 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
133 @@ -4303,12 +4308,14 @@ static void __setup_per_zone_pages_min(v
134 if (min_pages > 128)
135 min_pages = 128;
136 zone->pages_min = min_pages;
137 + zone->pages_emerg = 0;
138 } else {
139 /*
140 * If it's a lowmem zone, reserve a number of pages
141 * proportionate to the zone's size.
142 */
143 zone->pages_min = tmp;
144 + zone->pages_emerg = tmp_emerg;
145 }
146
147 zone->pages_low = zone->pages_min + (tmp >> 2);
148 @@ -4330,6 +4337,63 @@ void setup_per_zone_pages_min(void)
149 spin_unlock_irqrestore(&min_free_lock, flags);
150 }
151
152 +static void __adjust_memalloc_reserve(int pages)
153 +{
154 + var_free_kbytes += pages << (PAGE_SHIFT - 10);
155 + BUG_ON(var_free_kbytes < 0);
156 + setup_per_zone_pages_min();
157 +}
158 +
159 +static int test_reserve_limits(void)
160 +{
161 + struct zone *zone;
162 + int node;
163 +
164 + for_each_zone(zone)
165 + wakeup_kswapd(zone, 0);
166 +
167 + for_each_online_node(node) {
168 + struct page *page = alloc_pages_node(node, GFP_KERNEL, 0);
169 + if (!page)
170 + return -ENOMEM;
171 +
172 + __free_page(page);
173 + }
174 +
175 + return 0;
176 +}
177 +
178 +/**
179 + * adjust_memalloc_reserve - adjust the memalloc reserve
180 + * @pages: number of pages to add
181 + *
182 + * It adds a number of pages to the memalloc reserve; if
183 + * the number was positive it kicks reclaim into action to
184 + * satisfy the higher watermarks.
185 + *
186 + * returns -ENOMEM when it failed to satisfy the watermarks.
187 + */
188 +int adjust_memalloc_reserve(int pages)
189 +{
190 + int err = 0;
191 +
192 + mutex_lock(&var_free_mutex);
193 + __adjust_memalloc_reserve(pages);
194 + if (pages > 0) {
195 + err = test_reserve_limits();
196 + if (err) {
197 + __adjust_memalloc_reserve(-pages);
198 + goto unlock;
199 + }
200 + }
201 + printk(KERN_DEBUG "Emergency reserve: %d\n", var_free_kbytes);
202 +
203 +unlock:
204 + mutex_unlock(&var_free_mutex);
205 + return err;
206 +}
207 +EXPORT_SYMBOL_GPL(adjust_memalloc_reserve);
208 +
209 /*
210 * Initialise min_free_kbytes.
211 *
212 --- a/mm/vmstat.c
213 +++ b/mm/vmstat.c
214 @@ -692,9 +692,9 @@ static void zoneinfo_show_print(struct s
215 "\n spanned %lu"
216 "\n present %lu",
217 zone_page_state(zone, NR_FREE_PAGES),
218 - zone->pages_min,
219 - zone->pages_low,
220 - zone->pages_high,
221 + zone->pages_emerg + zone->pages_min,
222 + zone->pages_emerg + zone->pages_low,
223 + zone->pages_emerg + zone->pages_high,
224 zone->pages_scanned,
225 zone->nr_scan_active, zone->nr_scan_inactive,
226 zone->spanned_pages,