[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / SoN-08-reserve-slub.patch

From: Peter Zijlstra <a.p.zijlstra@chello.nl> 
Subject: mm: sl[au]b: add knowledge of reserve pages
Patch-mainline: No
References: FATE#303834

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it. This is done to ensure reserve pages don't
leak out and get consumed.

The basic pattern used for all # allocators is the following, for each active
slab page we store if it came from an emergency allocation. When we find it
did, make sure the current allocation context would have been able to allocate
page from the emergency reserves as well. In that case allow the allocation. If
not, force a new slab allocation. When that works the memory pressure has
lifted enough to allow this context to get an object, otherwise fail the
allocation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Neil Brown <neilb@suse.de>
Acked-by: Suresh Jayaraman <sjayaraman@suse.de>

---
 include/linux/slub_def.h |    1 
 mm/slab.c                |   60 +++++++++++++++++++++++++++++++++++++++--------
 mm/slob.c                |   16 +++++++++++-
 mm/slub.c                |   42 +++++++++++++++++++++++++++-----
 4 files changed, 102 insertions(+), 17 deletions(-)

Index: linux-2.6.26/mm/slub.c
===================================================================
--- linux-2.6.26.orig/mm/slub.c
+++ linux-2.6.26/mm/slub.c
@@ -23,6 +23,7 @@
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/math64.h>
+#include "internal.h"
 
 /*
  * Lock order:
@@ -1106,7 +1107,8 @@ static void setup_object(struct kmem_cac
 		s->ctor(object);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static
+struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
 {
 	struct page *page;
 	void *start;
@@ -1120,6 +1122,8 @@ static struct page *new_slab(struct kmem
 	if (!page)
 		goto out;
 
+	*reserve = page->reserve;
+
 	inc_slabs_node(s, page_to_nid(page), page->objects);
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
@@ -1503,10 +1507,20 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
+	int reserve;
 
 	/* We handle __GFP_ZERO in the caller */
 	gfpflags &= ~__GFP_ZERO;
 
+	if (unlikely(c->reserve)) {
+		/*
+		 * If the current slab is a reserve slab and the current
+		 * allocation context does not allow access to the reserves we
+		 * must force an allocation to test the current levels.
+		 */
+		if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+			goto grow_slab;
+	}
 	if (!c->page)
 		goto new_slab;
 
@@ -1520,8 +1534,8 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
-		goto debug;
+	if (unlikely(PageSlubDebug(c->page) || c->reserve))
+		goto slow_path;
 
 	c->freelist = object[c->offset];
 	c->page->inuse = c->page->objects;
@@ -1543,16 +1557,18 @@ new_slab:
 		goto load_freelist;
 	}
 
+grow_slab:
 	if (gfpflags & __GFP_WAIT)
 		local_irq_enable();
 
-	new = new_slab(s, gfpflags, node);
+	new = new_slab(s, gfpflags, node, &reserve);
 
 	if (gfpflags & __GFP_WAIT)
 		local_irq_disable();
 
 	if (new) {
 		c = get_cpu_slab(s, smp_processor_id());
+		c->reserve = reserve;
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1562,10 +1578,21 @@ new_slab:
 		goto load_freelist;
 	}
 	return NULL;
-debug:
-	if (!alloc_debug_processing(s, c->page, object, addr))
+
+slow_path:
+	if (PageSlubDebug(c->page) &&
+			!alloc_debug_processing(s, c->page, object, addr))
 		goto another_slab;
 
+	/*
+	 * Avoid the slub fast path in slab_alloc() by not setting
+	 * c->freelist and the fast path in slab_free() by making
+	 * node_match() fail by setting c->node to -1.
+	 *
+	 * We use this for for debug and reserve checks which need
+	 * to be done for each allocation.
+	 */
+
 	c->page->inuse++;
 	c->page->freelist = object[c->offset];
 	c->node = -1;
@@ -2078,10 +2105,11 @@ static struct kmem_cache_node *early_kme
 	struct page *page;
 	struct kmem_cache_node *n;
 	unsigned long flags;
+	int reserve;
 
 	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, gfpflags, node);
+	page = new_slab(kmalloc_caches, gfpflags, node, &reserve);
 
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
Index: linux-2.6.26/include/linux/slub_def.h
===================================================================
--- linux-2.6.26.orig/include/linux/slub_def.h
+++ linux-2.6.26/include/linux/slub_def.h
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
 	int node;		/* The node of the page (or -1 for debug) */
 	unsigned int offset;	/* Freepointer offset (in word units) */
 	unsigned int objsize;	/* Size of an object (from kmem_cache) */
+	int reserve;		/* Did the current page come from the reserve */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
Index: linux-2.6.26/mm/slab.c
===================================================================
--- linux-2.6.26.orig/mm/slab.c
+++ linux-2.6.26/mm/slab.c
@@ -116,6 +116,8 @@
 #include	<asm/tlbflush.h>
 #include	<asm/page.h>
 
+#include 	"internal.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -264,7 +266,8 @@ struct array_cache {
 	unsigned int avail;
 	unsigned int limit;
 	unsigned int batchcount;
-	unsigned int touched;
+	unsigned int touched:1,
+		     reserve:1;
 	spinlock_t lock;
 	void *entry[];	/*
 			 * Must have this definition in here for the proper
@@ -760,6 +763,27 @@ static inline struct array_cache *cpu_ca
 	return cachep->array[smp_processor_id()];
 }
 
+/*
+ * If the last page came from the reserves, and the current allocation context
+ * does not have access to them, force an allocation to test the watermarks.
+ */
+static inline int slab_force_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	if (unlikely(cpu_cache_get(cachep)->reserve) &&
+			!(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
+		return 1;
+
+	return 0;
+}
+
+static inline void slab_set_reserve(struct kmem_cache *cachep, int reserve)
+{
+	struct array_cache *ac = cpu_cache_get(cachep);
+
+	if (unlikely(ac->reserve != reserve))
+		ac->reserve = reserve;
+}
+
 static inline struct kmem_cache *__find_general_cachep(size_t size,
 							gfp_t gfpflags)
 {
@@ -959,6 +983,7 @@ static struct array_cache *alloc_arrayca
 		nc->limit = entries;
 		nc->batchcount = batchcount;
 		nc->touched = 0;
+		nc->reserve = 0;
 		spin_lock_init(&nc->lock);
 	}
 	return nc;
@@ -1661,7 +1686,8 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+		int *reserve)
 {
 	struct page *page;
 	int nr_pages;
@@ -1683,6 +1709,7 @@ static void *kmem_getpages(struct kmem_c
 	if (!page)
 		return NULL;
 
+	*reserve = page->reserve;
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		add_zone_page_state(page_zone(page),
@@ -2103,6 +2130,7 @@ static int __init_refok setup_cpu_cache(
 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
 	cpu_cache_get(cachep)->batchcount = 1;
 	cpu_cache_get(cachep)->touched = 0;
+	cpu_cache_get(cachep)->reserve = 0;
 	cachep->batchcount = 1;
 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
 	return 0;
@@ -2757,6 +2785,7 @@ static int cache_grow(struct kmem_cache 
 	size_t offset;
 	gfp_t local_flags;
 	struct kmem_list3 *l3;
+	int reserve;
 
 	/*
 	 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2795,7 +2824,7 @@ static int cache_grow(struct kmem_cache 
 	 * 'nodeid'.
 	 */
 	if (!objp)
-		objp = kmem_getpages(cachep, local_flags, nodeid);
+		objp = kmem_getpages(cachep, local_flags, nodeid, &reserve);
 	if (!objp)
 		goto failed;
 
@@ -2812,6 +2841,7 @@ static int cache_grow(struct kmem_cache 
 	if (local_flags & __GFP_WAIT)
 		local_irq_disable();
 	check_irq_off();
+	slab_set_reserve(cachep, reserve);
 	spin_lock(&l3->list_lock);
 
 	/* Make slab active. */
@@ -2946,7 +2976,8 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep,
+		gfp_t flags, int must_refill)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2956,6 +2987,8 @@ static void *cache_alloc_refill(struct k
 retry:
 	check_irq_off();
 	node = numa_node_id();
+	if (unlikely(must_refill))
+		goto force_grow;
 	ac = cpu_cache_get(cachep);
 	batchcount = ac->batchcount;
 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3023,11 +3056,14 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
+force_grow:
 		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
-		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+
+		/* no objects in sight? abort */
+		if (!x && (ac->avail == 0 || must_refill))
 			return NULL;
 
 		if (!ac->avail)		/* objects refilled by interrupt? */
@@ -3182,17 +3218,18 @@ static inline void *____cache_alloc(stru
 {
 	void *objp;
 	struct array_cache *ac;
+	int must_refill = slab_force_alloc(cachep, flags);
 
 	check_irq_off();
 
 	ac = cpu_cache_get(cachep);
-	if (likely(ac->avail)) {
+	if (likely(ac->avail && !must_refill)) {
 		STATS_INC_ALLOCHIT(cachep);
 		ac->touched = 1;
 		objp = ac->entry[--ac->avail];
 	} else {
 		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = cache_alloc_refill(cachep, flags, must_refill);
 	}
 	return objp;
 }
@@ -3236,7 +3273,7 @@ static void *fallback_alloc(struct kmem_
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(flags);
 	void *obj = NULL;
-	int nid;
+	int nid, reserve;
 
 	if (flags & __GFP_THISNODE)
 		return NULL;
@@ -3272,10 +3309,11 @@ retry:
 		if (local_flags & __GFP_WAIT)
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
-		obj = kmem_getpages(cache, local_flags, -1);
+		obj = kmem_getpages(cache, local_flags, -1, &reserve);
 		if (local_flags & __GFP_WAIT)
 			local_irq_disable();
 		if (obj) {
+			slab_set_reserve(cache, reserve);
 			/*
 			 * Insert into the appropriate per node queues
 			 */
@@ -3314,6 +3352,9 @@ static void *____cache_alloc_node(struct
 	l3 = cachep->nodelists[nodeid];
 	BUG_ON(!l3);
 
+	if (unlikely(slab_force_alloc(cachep, flags)))
+		goto force_grow;
+
 retry:
 	check_irq_off();
 	spin_lock(&l3->list_lock);
@@ -3351,6 +3392,7 @@ retry:
 
 must_grow:
 	spin_unlock(&l3->list_lock);
+force_grow:
 	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
 	if (x)
 		goto retry;
Index: linux-2.6.26/mm/slob.c
===================================================================
--- linux-2.6.26.orig/mm/slob.c
+++ linux-2.6.26/mm/slob.c
@@ -66,6 +66,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <asm/atomic.h>
+#include "internal.h"
 
 /*
  * slob_block has a field 'units', which indicates size of block if +ve,
@@ -183,6 +184,11 @@ struct slob_rcu {
 static DEFINE_SPINLOCK(slob_lock);
 
 /*
+ * tracks the reserve state for the allocator.
+ */
+static int slob_reserve;
+
+/*
  * Encode the given size and next info into a free slob block s.
  */
 static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
@@ -232,7 +238,7 @@ static int slob_last(slob_t *s)
 
 static void *slob_new_page(gfp_t gfp, int order, int node)
 {
-	void *page;
+	struct page *page;
 
 #ifdef CONFIG_NUMA
 	if (node != -1)
@@ -244,6 +250,8 @@ static void *slob_new_page(gfp_t gfp, in
 	if (!page)
 		return NULL;
 
+	slob_reserve = page->reserve;
+
 	return page_address(page);
 }
 
@@ -309,6 +317,11 @@ static void *slob_alloc(size_t size, gfp
 	slob_t *b = NULL;
 	unsigned long flags;
 
+	if (unlikely(slob_reserve)) {
+		if (!(gfp_to_alloc_flags(gfp) & ALLOC_NO_WATERMARKS))
+			goto grow;
+	}
+
 	if (size < SLOB_BREAK1)
 		slob_list = &free_slob_small;
 	else if (size < SLOB_BREAK2)
@@ -347,6 +360,7 @@ static void *slob_alloc(size_t size, gfp
 	}
 	spin_unlock_irqrestore(&slob_lock, flags);
 
+grow:
 	/* Not enough space: must allocate a new page */
 	if (!b) {
 		b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
Commit	Line	Data
2cb7cef9 BS	1	From: Peter Zijlstra <a.p.zijlstra@chello.nl>
	2	Subject: mm: sl[au]b: add knowledge of reserve pages
	3	Patch-mainline: No
	4	References: FATE#303834
	5
	6	Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
	7	contexts that are entitled to it. This is done to ensure reserve pages don't
	8	leak out and get consumed.
	9
	10	The basic pattern used for all # allocators is the following, for each active
	11	slab page we store if it came from an emergency allocation. When we find it
	12	did, make sure the current allocation context would have been able to allocate
	13	page from the emergency reserves as well. In that case allow the allocation. If
	14	not, force a new slab allocation. When that works the memory pressure has
	15	lifted enough to allow this context to get an object, otherwise fail the
	16	allocation.
	17
	18	Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
	19	Acked-by: Neil Brown <neilb@suse.de>
	20	Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
	21
	22	---
	23	include/linux/slub_def.h \| 1
	24	mm/slab.c \| 60 +++++++++++++++++++++++++++++++++++++++--------
	25	mm/slob.c \| 16 +++++++++++-
	26	mm/slub.c \| 42 +++++++++++++++++++++++++++-----
	27	4 files changed, 102 insertions(+), 17 deletions(-)
	28
	29	Index: linux-2.6.26/mm/slub.c
	30	===================================================================
	31	--- linux-2.6.26.orig/mm/slub.c
	32	+++ linux-2.6.26/mm/slub.c
	33	@@ -23,6 +23,7 @@
	34	#include <linux/kallsyms.h>
	35	#include <linux/memory.h>
	36	#include <linux/math64.h>
	37	+#include "internal.h"
	38
	39	/*
	40	* Lock order:
	41	@@ -1106,7 +1107,8 @@ static void setup_object(struct kmem_cac
	42	s->ctor(object);
	43	}
	44
	45	-static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
	46	+static
	47	+struct page new_slab(struct kmem_cache s, gfp_t flags, int node, int *reserve)
	48	{
	49	struct page *page;
	50	void *start;
	51	@@ -1120,6 +1122,8 @@ static struct page *new_slab(struct kmem
	52	if (!page)
	53	goto out;
	54
	55	+ *reserve = page->reserve;
	56	+
	57	inc_slabs_node(s, page_to_nid(page), page->objects);
	58	page->slab = s;
	59	page->flags \|= 1 << PG_slab;
	60	@@ -1503,10 +1507,20 @@ static void *__slab_alloc(struct kmem_ca
	61	{
	62	void **object;
	63	struct page *new;
	64	+ int reserve;
65
66	/* We handle __GFP_ZERO in the caller */
67	gfpflags &= ~__GFP_ZERO;
68
69	+ if (unlikely(c->reserve)) {
70	+ /*
71	+ * If the current slab is a reserve slab and the current
72	+ * allocation context does not allow access to the reserves we
73	+ * must force an allocation to test the current levels.
74	+ */
75	+ if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
76	+ goto grow_slab;
77	+ }
78	if (!c->page)
79	goto new_slab;
80
81	@@ -1520,8 +1534,8 @@ load_freelist:
82	object = c->page->freelist;
83	if (unlikely(!object))
84	goto another_slab;
85	- if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
86	- goto debug;
87	+ if (unlikely(PageSlubDebug(c->page) \|\| c->reserve))
88	+ goto slow_path;
89
90	c->freelist = object[c->offset];
91	c->page->inuse = c->page->objects;
92	@@ -1543,16 +1557,18 @@ new_slab:
93	goto load_freelist;
94	}
95
96	+grow_slab:
97	if (gfpflags & __GFP_WAIT)
98	local_irq_enable();
99
100	- new = new_slab(s, gfpflags, node);
101	+ new = new_slab(s, gfpflags, node, &reserve);
102
103	if (gfpflags & __GFP_WAIT)
104	local_irq_disable();
105
106	if (new) {
107	c = get_cpu_slab(s, smp_processor_id());
108	+ c->reserve = reserve;
109	stat(c, ALLOC_SLAB);
110	if (c->page)
111	flush_slab(s, c);
112	@@ -1562,10 +1578,21 @@ new_slab:
113	goto load_freelist;
114	}
115	return NULL;
116	-debug:
117	- if (!alloc_debug_processing(s, c->page, object, addr))
118	+
119	+slow_path:
120	+ if (PageSlubDebug(c->page) &&
121	+ !alloc_debug_processing(s, c->page, object, addr))
122	goto another_slab;
123
124	+ /*
125	+ * Avoid the slub fast path in slab_alloc() by not setting
126	+ * c->freelist and the fast path in slab_free() by making
127	+ * node_match() fail by setting c->node to -1.
128	+ *
129	+ * We use this for for debug and reserve checks which need
130	+ * to be done for each allocation.
131	+ */
132	+
133	c->page->inuse++;
134	c->page->freelist = object[c->offset];
135	c->node = -1;
136	@@ -2078,10 +2105,11 @@ static struct kmem_cache_node *early_kme
137	struct page *page;
138	struct kmem_cache_node *n;
139	unsigned long flags;
140	+ int reserve;
141
142	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
143
144	- page = new_slab(kmalloc_caches, gfpflags, node);
145	+ page = new_slab(kmalloc_caches, gfpflags, node, &reserve);
146
147	BUG_ON(!page);
148	if (page_to_nid(page) != node) {
149	Index: linux-2.6.26/include/linux/slub_def.h
150	===================================================================
151	--- linux-2.6.26.orig/include/linux/slub_def.h
152	+++ linux-2.6.26/include/linux/slub_def.h
153	@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
154	int node; /* The node of the page (or -1 for debug) */
155	unsigned int offset; /* Freepointer offset (in word units) */
156	unsigned int objsize; /* Size of an object (from kmem_cache) */
157	+ int reserve; /* Did the current page come from the reserve */
158	#ifdef CONFIG_SLUB_STATS
159	unsigned stat[NR_SLUB_STAT_ITEMS];
160	#endif
161	Index: linux-2.6.26/mm/slab.c
162	===================================================================
163	--- linux-2.6.26.orig/mm/slab.c
164	+++ linux-2.6.26/mm/slab.c
165	@@ -116,6 +116,8 @@
166	#include <asm/tlbflush.h>
167	#include <asm/page.h>
168
169	+#include "internal.h"
170	+
171	/*
172	* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
173	* 0 for faster, smaller code (especially in the critical paths).
174	@@ -264,7 +266,8 @@ struct array_cache {
175	unsigned int avail;
176	unsigned int limit;
177	unsigned int batchcount;
178	- unsigned int touched;
179	+ unsigned int touched:1,
180	+ reserve:1;
181	spinlock_t lock;
182	void entry[]; /
183	* Must have this definition in here for the proper
184	@@ -760,6 +763,27 @@ static inline struct array_cache *cpu_ca
185	return cachep->array[smp_processor_id()];
186	}
187
188	+/*
189	+ * If the last page came from the reserves, and the current allocation context
190	+ * does not have access to them, force an allocation to test the watermarks.
191	+ */
192	+static inline int slab_force_alloc(struct kmem_cache *cachep, gfp_t flags)
193	+{
194	+ if (unlikely(cpu_cache_get(cachep)->reserve) &&
195	+ !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
196	+ return 1;
197	+
198	+ return 0;
199	+}
200	+
201	+static inline void slab_set_reserve(struct kmem_cache *cachep, int reserve)
202	+{
203	+ struct array_cache *ac = cpu_cache_get(cachep);
204	+
205	+ if (unlikely(ac->reserve != reserve))
206	+ ac->reserve = reserve;
207	+}
208	+
209	static inline struct kmem_cache *__find_general_cachep(size_t size,
210	gfp_t gfpflags)
211	{
212	@@ -959,6 +983,7 @@ static struct array_cache *alloc_arrayca
213	nc->limit = entries;
214	nc->batchcount = batchcount;
215	nc->touched = 0;
216	+ nc->reserve = 0;
217	spin_lock_init(&nc->lock);
218	}
219	return nc;
220	@@ -1661,7 +1686,8 @@ __initcall(cpucache_init);
221	* did not request dmaable memory, we might get it, but that
222	* would be relatively rare and ignorable.
223	*/
224	-static void kmem_getpages(struct kmem_cache cachep, gfp_t flags, int nodeid)
225	+static void kmem_getpages(struct kmem_cache cachep, gfp_t flags, int nodeid,
226	+ int *reserve)
227	{
228	struct page *page;
229	int nr_pages;
230	@@ -1683,6 +1709,7 @@ static void *kmem_getpages(struct kmem_c
231	if (!page)
232	return NULL;
233
234	+ *reserve = page->reserve;
235	nr_pages = (1 << cachep->gfporder);
236	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
237	add_zone_page_state(page_zone(page),
238	@@ -2103,6 +2130,7 @@ static int __init_refok setup_cpu_cache(
239	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
240	cpu_cache_get(cachep)->batchcount = 1;
241	cpu_cache_get(cachep)->touched = 0;
242	+ cpu_cache_get(cachep)->reserve = 0;
243	cachep->batchcount = 1;
244	cachep->limit = BOOT_CPUCACHE_ENTRIES;
245	return 0;
246	@@ -2757,6 +2785,7 @@ static int cache_grow(struct kmem_cache
247	size_t offset;
248	gfp_t local_flags;
249	struct kmem_list3 *l3;
250	+ int reserve;
251
252	/*
253	* Be lazy and only check for valid flags here, keeping it out of the
254	@@ -2795,7 +2824,7 @@ static int cache_grow(struct kmem_cache
255	* 'nodeid'.
256	*/
257	if (!objp)
258	- objp = kmem_getpages(cachep, local_flags, nodeid);
259	+ objp = kmem_getpages(cachep, local_flags, nodeid, &reserve);
260	if (!objp)
261	goto failed;
262
263	@@ -2812,6 +2841,7 @@ static int cache_grow(struct kmem_cache
264	if (local_flags & __GFP_WAIT)
265	local_irq_disable();
266	check_irq_off();
267	+ slab_set_reserve(cachep, reserve);
268	spin_lock(&l3->list_lock);
269
270	/* Make slab active. */
271	@@ -2946,7 +2976,8 @@ bad:
272	#define check_slabp(x,y) do { } while(0)
273	#endif
274
275	-static void cache_alloc_refill(struct kmem_cache cachep, gfp_t flags)
276	+static void cache_alloc_refill(struct kmem_cache cachep,
277	+ gfp_t flags, int must_refill)
278	{
279	int batchcount;
280	struct kmem_list3 *l3;
281	@@ -2956,6 +2987,8 @@ static void *cache_alloc_refill(struct k
282	retry:
283	check_irq_off();
284	node = numa_node_id();
285	+ if (unlikely(must_refill))
286	+ goto force_grow;
287	ac = cpu_cache_get(cachep);
288	batchcount = ac->batchcount;
289	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
290	@@ -3023,11 +3056,14 @@ alloc_done:
291
292	if (unlikely(!ac->avail)) {
293	int x;
294	+force_grow:
295	x = cache_grow(cachep, flags \| GFP_THISNODE, node, NULL);
296
297	/* cache_grow can reenable interrupts, then ac could change. */
298	ac = cpu_cache_get(cachep);
299	- if (!x && ac->avail == 0) /* no objects in sight? abort */
300	+
301	+ /* no objects in sight? abort */
302	+ if (!x && (ac->avail == 0 \|\| must_refill))
303	return NULL;
304
305	if (!ac->avail) /* objects refilled by interrupt? */
306	@@ -3182,17 +3218,18 @@ static inline void *____cache_alloc(stru
307	{
308	void *objp;
309	struct array_cache *ac;
310	+ int must_refill = slab_force_alloc(cachep, flags);
311
312	check_irq_off();
313
314	ac = cpu_cache_get(cachep);
315	- if (likely(ac->avail)) {
316	+ if (likely(ac->avail && !must_refill)) {
317	STATS_INC_ALLOCHIT(cachep);
318	ac->touched = 1;
319	objp = ac->entry[--ac->avail];
320	} else {
321	STATS_INC_ALLOCMISS(cachep);
322	- objp = cache_alloc_refill(cachep, flags);
323	+ objp = cache_alloc_refill(cachep, flags, must_refill);
324	}
325	return objp;
326	}
327	@@ -3236,7 +3273,7 @@ static void *fallback_alloc(struct kmem_
328	struct zone *zone;
329	enum zone_type high_zoneidx = gfp_zone(flags);
330	void *obj = NULL;
331	- int nid;
332	+ int nid, reserve;
333
334	if (flags & __GFP_THISNODE)
335	return NULL;
336	@@ -3272,10 +3309,11 @@ retry:
337	if (local_flags & __GFP_WAIT)
338	local_irq_enable();
339	kmem_flagcheck(cache, flags);
340	- obj = kmem_getpages(cache, local_flags, -1);
341	+ obj = kmem_getpages(cache, local_flags, -1, &reserve);
342	if (local_flags & __GFP_WAIT)
343	local_irq_disable();
344	if (obj) {
345	+ slab_set_reserve(cache, reserve);
346	/*
347	* Insert into the appropriate per node queues
348	*/
349	@@ -3314,6 +3352,9 @@ static void *____cache_alloc_node(struct
350	l3 = cachep->nodelists[nodeid];
351	BUG_ON(!l3);
352
353	+ if (unlikely(slab_force_alloc(cachep, flags)))
354	+ goto force_grow;
355	+
356	retry:
357	check_irq_off();
358	spin_lock(&l3->list_lock);
359	@@ -3351,6 +3392,7 @@ retry:
360
361	must_grow:
362	spin_unlock(&l3->list_lock);
363	+force_grow:
364	x = cache_grow(cachep, flags \| GFP_THISNODE, nodeid, NULL);
365	if (x)
366	goto retry;
367	Index: linux-2.6.26/mm/slob.c
368	===================================================================
369	--- linux-2.6.26.orig/mm/slob.c
370	+++ linux-2.6.26/mm/slob.c
371	@@ -66,6 +66,7 @@
372	#include <linux/rcupdate.h>
373	#include <linux/list.h>
374	#include <asm/atomic.h>
375	+#include "internal.h"
376
377	/*
378	* slob_block has a field 'units', which indicates size of block if +ve,
379	@@ -183,6 +184,11 @@ struct slob_rcu {
380	static DEFINE_SPINLOCK(slob_lock);
381
382	/*
383	+ * tracks the reserve state for the allocator.
384	+ */
385	+static int slob_reserve;
386	+
387	+/*
388	* Encode the given size and next info into a free slob block s.
389	*/
390	static void set_slob(slob_t s, slobidx_t size, slob_t next)
391	@@ -232,7 +238,7 @@ static int slob_last(slob_t *s)
392
393	static void *slob_new_page(gfp_t gfp, int order, int node)
394	{
395	- void *page;
396	+ struct page *page;
397
398	#ifdef CONFIG_NUMA
399	if (node != -1)
400	@@ -244,6 +250,8 @@ static void *slob_new_page(gfp_t gfp, in
401	if (!page)
402	return NULL;
403
404	+ slob_reserve = page->reserve;
405	+
406	return page_address(page);
407	}
408
409	@@ -309,6 +317,11 @@ static void *slob_alloc(size_t size, gfp
410	slob_t *b = NULL;
411	unsigned long flags;
412
413	+ if (unlikely(slob_reserve)) {
414	+ if (!(gfp_to_alloc_flags(gfp) & ALLOC_NO_WATERMARKS))
415	+ goto grow;
416	+ }
417	+
418	if (size < SLOB_BREAK1)
419	slob_list = &free_slob_small;
420	else if (size < SLOB_BREAK2)
421	@@ -347,6 +360,7 @@ static void *slob_alloc(size_t size, gfp
422	}
423	spin_unlock_irqrestore(&slob_lock, flags);
424
425	+grow:
426	/* Not enough space: must allocate a new page */
427	if (!b) {
428	b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);