]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.suse/SoN-14-mm-reserve.patch
Changed checkfs to auto reboot after correctable fsck fixes.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / SoN-14-mm-reserve.patch
CommitLineData
00e5a55c
BS
1From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2Subject: mm: memory reserve management
3Patch-mainline: No
4References: FATE#303834
5
6Generic reserve management code.
7
8It provides methods to reserve and charge. Upon this, generic alloc/free style
9reserve pools could be build, which could fully replace mempool_t
10functionality.
11
12It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.
13
14Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
15Acked-by: Neil Brown <neilb@suse.de>
16Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
17
18---
19 include/linux/reserve.h | 198 ++++++++++++++
20 include/linux/slab.h | 20 -
21 mm/Makefile | 2
22 mm/reserve.c | 637 ++++++++++++++++++++++++++++++++++++++++++++++++
23 mm/slub.c | 2
24 5 files changed, 848 insertions(+), 11 deletions(-)
25
26Index: linux-2.6.27/include/linux/reserve.h
27===================================================================
28--- /dev/null
29+++ linux-2.6.27/include/linux/reserve.h
30@@ -0,0 +1,198 @@
31+/*
32+ * Memory reserve management.
33+ *
34+ * Copyright (C) 2007-2008 Red Hat, Inc.,
35+ * Peter Zijlstra <pzijlstr@redhat.com>
36+ *
37+ * This file contains the public data structure and API definitions.
38+ */
39+
40+#ifndef _LINUX_RESERVE_H
41+#define _LINUX_RESERVE_H
42+
43+#include <linux/list.h>
44+#include <linux/spinlock.h>
45+#include <linux/wait.h>
46+#include <linux/slab.h>
47+
48+struct mem_reserve {
49+ struct mem_reserve *parent;
50+ struct list_head children;
51+ struct list_head siblings;
52+
53+ const char *name;
54+
55+ long pages;
56+ long limit;
57+ long usage;
58+ spinlock_t lock; /* protects limit and usage */
59+
60+ wait_queue_head_t waitqueue;
61+};
62+
63+extern struct mem_reserve mem_reserve_root;
64+
65+void mem_reserve_init(struct mem_reserve *res, const char *name,
66+ struct mem_reserve *parent);
67+int mem_reserve_connect(struct mem_reserve *new_child,
68+ struct mem_reserve *node);
69+void mem_reserve_disconnect(struct mem_reserve *node);
70+
71+int mem_reserve_pages_set(struct mem_reserve *res, long pages);
72+int mem_reserve_pages_add(struct mem_reserve *res, long pages);
73+int mem_reserve_pages_charge(struct mem_reserve *res, long pages);
74+
75+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
76+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes);
77+
78+struct kmem_cache;
79+
80+int mem_reserve_kmem_cache_set(struct mem_reserve *res,
81+ struct kmem_cache *s,
82+ int objects);
83+int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
84+ struct kmem_cache *s, long objs);
85+
86+void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
87+ struct mem_reserve *res, int *emerg);
88+
89+static inline
90+void *__kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
91+ struct mem_reserve *res, int *emerg)
92+{
93+ void *obj;
94+
95+ obj = __kmalloc_node_track_caller(size,
96+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node, ip);
97+ if (!obj)
98+ obj = ___kmalloc_reserve(size, flags, node, ip, res, emerg);
99+
100+ return obj;
101+}
102+
103+/**
104+ * kmalloc_reserve() - kmalloc() and charge against @res for @emerg allocations
105+ * @size - size of the requested memory region
106+ * @gfp - allocation flags to use for this allocation
107+ * @node - preferred memory node for this allocation
108+ * @res - reserve to charge emergency allocations against
109+ * @emerg - bit 0 is set when the allocation was an emergency allocation
110+ *
111+ * Returns NULL on failure
112+ */
113+#define kmalloc_reserve(size, gfp, node, res, emerg) \
114+ __kmalloc_reserve(size, gfp, node, \
115+ __builtin_return_address(0), res, emerg)
116+
117+void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg);
118+
119+/**
120+ * kfree_reserve() - kfree() and uncharge against @res for @emerg allocations
121+ * @obj - memory to free
122+ * @res - reserve to uncharge emergency allocations from
123+ * @emerg - was this an emergency allocation
124+ */
125+static inline
126+void kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
127+{
128+ if (unlikely(obj && res && emerg))
129+ __kfree_reserve(obj, res, emerg);
130+ else
131+ kfree(obj);
132+}
133+
134+void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
135+ struct mem_reserve *res, int *emerg);
136+
137+/**
138+ * kmem_cache_alloc_reserve() - kmem_cache_alloc() and charge against @res
139+ * @s - kmem_cache to allocate from
140+ * @gfp - allocation flags to use for this allocation
141+ * @node - preferred memory node for this allocation
142+ * @res - reserve to charge emergency allocations against
143+ * @emerg - bit 0 is set when the allocation was an emergency allocation
144+ *
145+ * Returns NULL on failure
146+ */
147+static inline
148+void *kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
149+ struct mem_reserve *res, int *emerg)
150+{
151+ void *obj;
152+
153+ obj = kmem_cache_alloc_node(s,
154+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
155+ if (!obj)
156+ obj = __kmem_cache_alloc_reserve(s, flags, node, res, emerg);
157+
158+ return obj;
159+}
160+
161+void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
162+ struct mem_reserve *res, int emerg);
163+
164+/**
165+ * kmem_cache_free_reserve() - kmem_cache_free() and uncharge against @res
166+ * @s - kmem_cache to free to
167+ * @obj - memory to free
168+ * @res - reserve to uncharge emergency allocations from
169+ * @emerg - was this an emergency allocation
170+ */
171+static inline
172+void kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
173+ struct mem_reserve *res, int emerg)
174+{
175+ if (unlikely(obj && res && emerg))
176+ __kmem_cache_free_reserve(s, obj, res, emerg);
177+ else
178+ kmem_cache_free(s, obj);
179+}
180+
181+struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
182+ struct mem_reserve *res, int *emerg);
183+
184+/**
185+ * alloc_pages_reserve() - alloc_pages() and charge against @res
186+ * @node - preferred memory node for this allocation
187+ * @gfp - allocation flags to use for this allocation
188+ * @order - page order
189+ * @res - reserve to charge emergency allocations against
190+ * @emerg - bit 0 is set when the allocation was an emergency allocation
191+ *
192+ * Returns NULL on failure
193+ */
194+static inline
195+struct page *alloc_pages_reserve(int node, gfp_t flags, int order,
196+ struct mem_reserve *res, int *emerg)
197+{
198+ struct page *page;
199+
200+ page = alloc_pages_node(node,
201+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN, order);
202+ if (!page)
203+ page = __alloc_pages_reserve(node, flags, order, res, emerg);
204+
205+ return page;
206+}
207+
208+void __free_pages_reserve(struct page *page, int order,
209+ struct mem_reserve *res, int emerg);
210+
211+/**
212+ * free_pages_reserve() - __free_pages() and uncharge against @res
213+ * @page - page to free
214+ * @order - page order
215+ * @res - reserve to uncharge emergency allocations from
216+ * @emerg - was this an emergency allocation
217+ */
218+static inline
219+void free_pages_reserve(struct page *page, int order,
220+ struct mem_reserve *res, int emerg)
221+{
222+ if (unlikely(page && res && emerg))
223+ __free_pages_reserve(page, order, res, emerg);
224+ else
225+ __free_pages(page, order);
226+}
227+
228+#endif /* _LINUX_RESERVE_H */
229Index: linux-2.6.27/mm/Makefile
230===================================================================
231--- linux-2.6.27.orig/mm/Makefile
232+++ linux-2.6.27/mm/Makefile
233@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
234 maccess.o page_alloc.o page-writeback.o pdflush.o \
235 readahead.o swap.o truncate.o vmscan.o \
236 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
237- page_isolation.o mm_init.o $(mmu-y)
238+ page_isolation.o mm_init.o reserve.o $(mmu-y)
239
240 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
241 obj-$(CONFIG_BOUNCE) += bounce.o
242Index: linux-2.6.27/mm/reserve.c
243===================================================================
244--- /dev/null
245+++ linux-2.6.27/mm/reserve.c
246@@ -0,0 +1,637 @@
247+/*
248+ * Memory reserve management.
249+ *
250+ * Copyright (C) 2007-2008, Red Hat, Inc.,
251+ * Peter Zijlstra <pzijlstr@redhat.com>
252+ *
253+ * Description:
254+ *
255+ * Manage a set of memory reserves.
256+ *
257+ * A memory reserve is a reserve for a specified number of object of specified
258+ * size. Since memory is managed in pages, this reserve demand is then
259+ * translated into a page unit.
260+ *
261+ * So each reserve has a specified object limit, an object usage count and a
262+ * number of pages required to back these objects.
263+ *
264+ * Usage is charged against a reserve, if the charge fails, the resource must
265+ * not be allocated/used.
266+ *
267+ * The reserves are managed in a tree, and the resource demands (pages and
268+ * limit) are propagated up the tree. Obviously the object limit will be
269+ * meaningless as soon as the unit starts mixing, but the required page reserve
270+ * (being of one unit) is still valid at the root.
271+ *
272+ * It is the page demand of the root node that is used to set the global
273+ * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
274+ *
275+ * As long as a subtree has the same usage unit, an aggregate node can be used
276+ * to charge against, instead of the leaf nodes. However, do be consistent with
277+ * who is charged, resource usage is not propagated up the tree (for
278+ * performance reasons).
279+ */
280+
281+#include <linux/reserve.h>
282+#include <linux/mutex.h>
283+#include <linux/mmzone.h>
284+#include <linux/log2.h>
285+#include <linux/proc_fs.h>
286+#include <linux/seq_file.h>
287+#include <linux/module.h>
288+#include <linux/slab.h>
289+#include <linux/sched.h>
290+#include "internal.h"
291+
292+static DEFINE_MUTEX(mem_reserve_mutex);
293+
294+/**
295+ * @mem_reserve_root - the global reserve root
296+ *
297+ * The global reserve is empty, and has no limit unit, it merely
298+ * acts as an aggregation point for reserves and an interface to
299+ * adjust_memalloc_reserve().
300+ */
301+struct mem_reserve mem_reserve_root = {
302+ .children = LIST_HEAD_INIT(mem_reserve_root.children),
303+ .siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
304+ .name = "total reserve",
305+ .lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
306+ .waitqueue = __WAIT_QUEUE_HEAD_INITIALIZER(mem_reserve_root.waitqueue),
307+};
308+EXPORT_SYMBOL_GPL(mem_reserve_root);
309+
310+/**
311+ * mem_reserve_init() - initialize a memory reserve object
312+ * @res - the new reserve object
313+ * @name - a name for this reserve
314+ * @parent - when non NULL, the parent to connect to.
315+ */
316+void mem_reserve_init(struct mem_reserve *res, const char *name,
317+ struct mem_reserve *parent)
318+{
319+ memset(res, 0, sizeof(*res));
320+ INIT_LIST_HEAD(&res->children);
321+ INIT_LIST_HEAD(&res->siblings);
322+ res->name = name;
323+ spin_lock_init(&res->lock);
324+ init_waitqueue_head(&res->waitqueue);
325+
326+ if (parent)
327+ mem_reserve_connect(res, parent);
328+}
329+EXPORT_SYMBOL_GPL(mem_reserve_init);
330+
331+/*
332+ * propagate the pages and limit changes up the (sub)tree.
333+ */
334+static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
335+{
336+ unsigned long flags;
337+
338+ for ( ; res; res = res->parent) {
339+ res->pages += pages;
340+
341+ if (limit) {
342+ spin_lock_irqsave(&res->lock, flags);
343+ res->limit += limit;
344+ spin_unlock_irqrestore(&res->lock, flags);
345+ }
346+ }
347+}
348+
349+/**
350+ * __mem_reserve_add() - primitive to change the size of a reserve
351+ * @res - reserve to change
352+ * @pages - page delta
353+ * @limit - usage limit delta
354+ *
355+ * Returns -ENOMEM when a size increase is not possible atm.
356+ */
357+static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
358+{
359+ int ret = 0;
360+ long reserve;
361+
362+ /*
363+ * This looks more complex than need be, that is because we handle
364+ * the case where @res isn't actually connected to mem_reserve_root.
365+ *
366+ * So, by propagating the new pages up the (sub)tree and computing
367+ * the difference in mem_reserve_root.pages we find if this action
368+ * affects the actual reserve.
369+ *
370+ * The (partial) propagation also makes that mem_reserve_connect()
371+ * needs only look at the direct child, since each disconnected
372+ * sub-tree is fully up-to-date.
373+ */
374+ reserve = mem_reserve_root.pages;
375+ __calc_reserve(res, pages, 0);
376+ reserve = mem_reserve_root.pages - reserve;
377+
378+ if (reserve) {
379+ ret = adjust_memalloc_reserve(reserve);
380+ if (ret)
381+ __calc_reserve(res, -pages, 0);
382+ }
383+
384+ /*
385+ * Delay updating the limits until we've acquired the resources to
386+ * back it.
387+ */
388+ if (!ret)
389+ __calc_reserve(res, 0, limit);
390+
391+ return ret;
392+}
393+
394+/**
395+ * __mem_reserve_charge() - primitive to charge object usage of a reserve
396+ * @res - reserve to charge
397+ * @charge - size of the charge
398+ *
399+ * Returns non-zero on success, zero on failure.
400+ */
401+static
402+int __mem_reserve_charge(struct mem_reserve *res, long charge)
403+{
404+ unsigned long flags;
405+ int ret = 0;
406+
407+ spin_lock_irqsave(&res->lock, flags);
408+ if (charge < 0 || res->usage + charge < res->limit) {
409+ res->usage += charge;
410+ if (unlikely(res->usage < 0))
411+ res->usage = 0;
412+ ret = 1;
413+ }
414+ if (charge < 0)
415+ wake_up_all(&res->waitqueue);
416+ spin_unlock_irqrestore(&res->lock, flags);
417+
418+ return ret;
419+}
420+
421+/**
422+ * mem_reserve_connect() - connect a reserve to another in a child-parent relation
423+ * @new_child - the reserve node to connect (child)
424+ * @node - the reserve node to connect to (parent)
425+ *
426+ * Connecting a node results in an increase of the reserve by the amount of
427+ * pages in @new_child->pages if @node has a connection to mem_reserve_root.
428+ *
429+ * Returns -ENOMEM when the new connection would increase the reserve (parent
430+ * is connected to mem_reserve_root) and there is no memory to do so.
431+ *
432+ * On error, the child is _NOT_ connected.
433+ */
434+int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
435+{
436+ int ret;
437+
438+ WARN_ON(!new_child->name);
439+
440+ mutex_lock(&mem_reserve_mutex);
441+ if (new_child->parent) {
442+ ret = -EEXIST;
443+ goto unlock;
444+ }
445+ new_child->parent = node;
446+ list_add(&new_child->siblings, &node->children);
447+ ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
448+ if (ret) {
449+ new_child->parent = NULL;
450+ list_del_init(&new_child->siblings);
451+ }
452+unlock:
453+ mutex_unlock(&mem_reserve_mutex);
454+
455+ return ret;
456+}
457+EXPORT_SYMBOL_GPL(mem_reserve_connect);
458+
459+/**
460+ * mem_reserve_disconnect() - sever a nodes connection to the reserve tree
461+ * @node - the node to disconnect
462+ *
463+ * Disconnecting a node results in a reduction of the reserve by @node->pages
464+ * if node had a connection to mem_reserve_root.
465+ */
466+void mem_reserve_disconnect(struct mem_reserve *node)
467+{
468+ int ret;
469+
470+ BUG_ON(!node->parent);
471+
472+ mutex_lock(&mem_reserve_mutex);
473+ if (!node->parent) {
474+ ret = -ENOENT;
475+ goto unlock;
476+ }
477+ ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
478+ if (!ret) {
479+ node->parent = NULL;
480+ list_del_init(&node->siblings);
481+ }
482+unlock:
483+ mutex_unlock(&mem_reserve_mutex);
484+
485+ /*
486+ * We cannot fail to shrink the reserves, can we?
487+ */
488+ WARN_ON(ret);
489+}
490+EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
491+
492+#ifdef CONFIG_PROC_FS
493+
494+/*
495+ * Simple output of the reserve tree in: /proc/reserve_info
496+ * Example:
497+ *
498+ * localhost ~ # cat /proc/reserve_info
499+ * 1:0 "total reserve" 6232K 0/278581
500+ * 2:1 "total network reserve" 6232K 0/278581
501+ * 3:2 "network TX reserve" 212K 0/53
502+ * 4:3 "protocol TX pages" 212K 0/53
503+ * 5:2 "network RX reserve" 6020K 0/278528
504+ * 6:5 "IPv4 route cache" 5508K 0/16384
505+ * 7:5 "SKB data reserve" 512K 0/262144
506+ * 8:7 "IPv4 fragment cache" 512K 0/262144
507+ */
508+
509+static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
510+ unsigned int parent, unsigned int *id)
511+{
512+ struct mem_reserve *child;
513+ unsigned int my_id = ++*id;
514+
515+ seq_printf(m, "%d:%d \"%s\" %ldK %ld/%ld\n",
516+ my_id, parent, res->name,
517+ res->pages << (PAGE_SHIFT - 10),
518+ res->usage, res->limit);
519+
520+ list_for_each_entry(child, &res->children, siblings)
521+ mem_reserve_show_item(m, child, my_id, id);
522+}
523+
524+static int mem_reserve_show(struct seq_file *m, void *v)
525+{
526+ unsigned int ident = 0;
527+
528+ mutex_lock(&mem_reserve_mutex);
529+ mem_reserve_show_item(m, &mem_reserve_root, ident, &ident);
530+ mutex_unlock(&mem_reserve_mutex);
531+
532+ return 0;
533+}
534+
535+static int mem_reserve_open(struct inode *inode, struct file *file)
536+{
537+ return single_open(file, mem_reserve_show, NULL);
538+}
539+
540+static const struct file_operations mem_reserve_opterations = {
541+ .open = mem_reserve_open,
542+ .read = seq_read,
543+ .llseek = seq_lseek,
544+ .release = single_release,
545+};
546+
547+static __init int mem_reserve_proc_init(void)
548+{
549+ proc_create("reserve_info", S_IRUSR, NULL, &mem_reserve_opterations);
550+ return 0;
551+}
552+
553+module_init(mem_reserve_proc_init);
554+
555+#endif
556+
557+/*
558+ * alloc_page helpers
559+ */
560+
561+/**
562+ * mem_reserve_pages_set() - set reserves size in pages
563+ * @res - reserve to set
564+ * @pages - size in pages to set it to
565+ *
566+ * Returns -ENOMEM when it fails to set the reserve. On failure the old size
567+ * is preserved.
568+ */
569+int mem_reserve_pages_set(struct mem_reserve *res, long pages)
570+{
571+ int ret;
572+
573+ mutex_lock(&mem_reserve_mutex);
574+ pages -= res->pages;
575+ ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
576+ mutex_unlock(&mem_reserve_mutex);
577+
578+ return ret;
579+}
580+EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
581+
582+/**
583+ * mem_reserve_pages_add() - change the size in a relative way
584+ * @res - reserve to change
585+ * @pages - number of pages to add (or subtract when negative)
586+ *
587+ * Similar to mem_reserve_pages_set, except that the argument is relative
588+ * instead of absolute.
589+ *
590+ * Returns -ENOMEM when it fails to increase.
591+ */
592+int mem_reserve_pages_add(struct mem_reserve *res, long pages)
593+{
594+ int ret;
595+
596+ mutex_lock(&mem_reserve_mutex);
597+ ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
598+ mutex_unlock(&mem_reserve_mutex);
599+
600+ return ret;
601+}
602+
603+/**
604+ * mem_reserve_pages_charge() - charge page usage to a reserve
605+ * @res - reserve to charge
606+ * @pages - size to charge
607+ *
608+ * Returns non-zero on success.
609+ */
610+int mem_reserve_pages_charge(struct mem_reserve *res, long pages)
611+{
612+ return __mem_reserve_charge(res, pages * PAGE_SIZE);
613+}
614+EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
615+
616+/*
617+ * kmalloc helpers
618+ */
619+
620+/**
621+ * mem_reserve_kmalloc_set() - set this reserve to bytes worth of kmalloc
622+ * @res - reserve to change
623+ * @bytes - size in bytes to reserve
624+ *
625+ * Returns -ENOMEM on failure.
626+ */
627+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
628+{
629+ int ret;
630+ long pages;
631+
632+ mutex_lock(&mem_reserve_mutex);
633+ pages = kmalloc_estimate_bytes(GFP_ATOMIC, bytes);
634+ pages -= res->pages;
635+ bytes -= res->limit;
636+ ret = __mem_reserve_add(res, pages, bytes);
637+ mutex_unlock(&mem_reserve_mutex);
638+
639+ return ret;
640+}
641+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
642+
643+/**
644+ * mem_reserve_kmalloc_charge() - charge bytes to a reserve
645+ * @res - reserve to charge
646+ * @bytes - bytes to charge
647+ *
648+ * Returns non-zero on success.
649+ */
650+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes)
651+{
652+ if (bytes < 0)
653+ bytes = -roundup_pow_of_two(-bytes);
654+ else
655+ bytes = roundup_pow_of_two(bytes);
656+
657+ return __mem_reserve_charge(res, bytes);
658+}
659+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
660+
661+/*
662+ * kmem_cache helpers
663+ */
664+
665+/**
666+ * mem_reserve_kmem_cache_set() - set reserve to @objects worth of kmem_cache_alloc of @s
667+ * @res - reserve to set
668+ * @s - kmem_cache to reserve from
669+ * @objects - number of objects to reserve
670+ *
671+ * Returns -ENOMEM on failure.
672+ */
673+int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
674+ int objects)
675+{
676+ int ret;
677+ long pages, bytes;
678+
679+ mutex_lock(&mem_reserve_mutex);
680+ pages = kmem_alloc_estimate(s, GFP_ATOMIC, objects);
681+ pages -= res->pages;
682+ bytes = objects * kmem_cache_size(s) - res->limit;
683+ ret = __mem_reserve_add(res, pages, bytes);
684+ mutex_unlock(&mem_reserve_mutex);
685+
686+ return ret;
687+}
688+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
689+
690+/**
691+ * mem_reserve_kmem_cache_charge() - charge (or uncharge) usage of objs
692+ * @res - reserve to charge
693+ * @objs - objects to charge for
694+ *
695+ * Returns non-zero on success.
696+ */
697+int mem_reserve_kmem_cache_charge(struct mem_reserve *res, struct kmem_cache *s,
698+ long objs)
699+{
700+ return __mem_reserve_charge(res, objs * kmem_cache_size(s));
701+}
702+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);
703+
704+/*
705+ * Alloc wrappers.
706+ *
707+ * Actual usage is commented in linux/reserve.h where the interface functions
708+ * live. Furthermore, the code is 3 instances of the same paradigm, hence only
709+ * the first contains extensive comments.
710+ */
711+
712+/*
713+ * kmalloc/kfree
714+ */
715+
716+void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
717+ struct mem_reserve *res, int *emerg)
718+{
719+ void *obj;
720+ gfp_t gfp;
721+
722+ /*
723+ * Try a regular allocation, when that fails and we're not entitled
724+ * to the reserves, fail.
725+ */
726+ gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
727+ obj = __kmalloc_node_track_caller(size, gfp, node, ip);
728+
729+ if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
730+ goto out;
731+
732+ /*
733+ * If we were given a reserve to charge against, try that.
734+ */
735+ if (res && !mem_reserve_kmalloc_charge(res, size)) {
736+ /*
737+ * If we failed to charge and we're not allowed to wait for
738+ * it to succeed, bail.
739+ */
740+ if (!(flags & __GFP_WAIT))
741+ goto out;
742+
743+ /*
744+ * Wait for a successfull charge against the reserve. All
745+ * uncharge operations against this reserve will wake us up.
746+ */
747+ wait_event(res->waitqueue,
748+ mem_reserve_kmalloc_charge(res, size));
749+
750+ /*
751+ * After waiting for it, again try a regular allocation.
752+ * Pressure could have lifted during our sleep. If this
753+ * succeeds, uncharge the reserve.
754+ */
755+ obj = __kmalloc_node_track_caller(size, gfp, node, ip);
756+ if (obj) {
757+ mem_reserve_kmalloc_charge(res, -size);
758+ goto out;
759+ }
760+ }
761+
762+ /*
763+ * Regular allocation failed, and we've successfully charged our
764+ * requested usage against the reserve. Do the emergency allocation.
765+ */
766+ obj = __kmalloc_node_track_caller(size, flags, node, ip);
767+ WARN_ON(!obj);
768+ if (emerg)
769+ *emerg = 1;
770+
771+out:
772+ return obj;
773+}
774+
775+void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
776+{
777+ /*
778+ * ksize gives the full allocated size vs the requested size we used to
779+ * charge; however since we round up to the nearest power of two, this
780+ * should all work nicely.
781+ */
782+ size_t size = ksize(obj);
783+
784+ kfree(obj);
785+ /*
786+ * Free before uncharge, this ensures memory is actually present when
787+ * a subsequent charge succeeds.
788+ */
789+ mem_reserve_kmalloc_charge(res, -size);
790+}
791+
792+/*
793+ * kmem_cache_alloc/kmem_cache_free
794+ */
795+
796+void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
797+ struct mem_reserve *res, int *emerg)
798+{
799+ void *obj;
800+ gfp_t gfp;
801+
802+ gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
803+ obj = kmem_cache_alloc_node(s, gfp, node);
804+
805+ if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
806+ goto out;
807+
808+ if (res && !mem_reserve_kmem_cache_charge(res, s, 1)) {
809+ if (!(flags & __GFP_WAIT))
810+ goto out;
811+
812+ wait_event(res->waitqueue,
813+ mem_reserve_kmem_cache_charge(res, s, 1));
814+
815+ obj = kmem_cache_alloc_node(s, gfp, node);
816+ if (obj) {
817+ mem_reserve_kmem_cache_charge(res, s, -1);
818+ goto out;
819+ }
820+ }
821+
822+ obj = kmem_cache_alloc_node(s, flags, node);
823+ WARN_ON(!obj);
824+ if (emerg)
825+ *emerg = 1;
826+
827+out:
828+ return obj;
829+}
830+
831+void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
832+ struct mem_reserve *res, int emerg)
833+{
834+ kmem_cache_free(s, obj);
835+ mem_reserve_kmem_cache_charge(res, s, -1);
836+}
837+
838+/*
839+ * alloc_pages/free_pages
840+ */
841+
842+struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
843+ struct mem_reserve *res, int *emerg)
844+{
845+ struct page *page;
846+ gfp_t gfp;
847+ long pages = 1 << order;
848+
849+ gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
850+ page = alloc_pages_node(node, gfp, order);
851+
852+ if (page || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
853+ goto out;
854+
855+ if (res && !mem_reserve_pages_charge(res, pages)) {
856+ if (!(flags & __GFP_WAIT))
857+ goto out;
858+
859+ wait_event(res->waitqueue,
860+ mem_reserve_pages_charge(res, pages));
861+
862+ page = alloc_pages_node(node, gfp, order);
863+ if (page) {
864+ mem_reserve_pages_charge(res, -pages);
865+ goto out;
866+ }
867+ }
868+
869+ page = alloc_pages_node(node, flags, order);
870+ WARN_ON(!page);
871+ if (emerg)
872+ *emerg = 1;
873+
874+out:
875+ return page;
876+}
877+
878+void __free_pages_reserve(struct page *page, int order,
879+ struct mem_reserve *res, int emerg)
880+{
881+ __free_pages(page, order);
882+ mem_reserve_pages_charge(res, -(1 << order));
883+}
884Index: linux-2.6.27/include/linux/slab.h
885===================================================================
886--- linux-2.6.27.orig/include/linux/slab.h
887+++ linux-2.6.27/include/linux/slab.h
888@@ -230,13 +230,14 @@ static inline void *kmem_cache_alloc_nod
889 */
890 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
891 extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
892-#define kmalloc_track_caller(size, flags) \
893- __kmalloc_track_caller(size, flags, __builtin_return_address(0))
894 #else
895-#define kmalloc_track_caller(size, flags) \
896+#define __kmalloc_track_caller(size, flags, ip) \
897 __kmalloc(size, flags)
898 #endif /* DEBUG_SLAB */
899
900+#define kmalloc_track_caller(size, flags) \
901+ __kmalloc_track_caller(size, flags, __builtin_return_address(0))
902+
903 #ifdef CONFIG_NUMA
904 /*
905 * kmalloc_node_track_caller is a special version of kmalloc_node that
906@@ -248,21 +249,22 @@ extern void *__kmalloc_track_caller(size
907 */
908 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
909 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
910-#define kmalloc_node_track_caller(size, flags, node) \
911- __kmalloc_node_track_caller(size, flags, node, \
912- __builtin_return_address(0))
913 #else
914-#define kmalloc_node_track_caller(size, flags, node) \
915+#define __kmalloc_node_track_caller(size, flags, node, ip) \
916 __kmalloc_node(size, flags, node)
917 #endif
918
919 #else /* CONFIG_NUMA */
920
921-#define kmalloc_node_track_caller(size, flags, node) \
922- kmalloc_track_caller(size, flags)
923+#define __kmalloc_node_track_caller(size, flags, node, ip) \
924+ __kmalloc_track_caller(size, flags, ip)
925
926 #endif /* DEBUG_SLAB */
927
928+#define kmalloc_node_track_caller(size, flags, node) \
929+ __kmalloc_node_track_caller(size, flags, node, \
930+ __builtin_return_address(0))
931+
932 /*
933 * Shortcuts
934 */
935Index: linux-2.6.27/mm/slub.c
936===================================================================
937--- linux-2.6.27.orig/mm/slub.c
938+++ linux-2.6.27/mm/slub.c
939@@ -2726,6 +2726,7 @@ void *__kmalloc(size_t size, gfp_t flags
940 }
941 EXPORT_SYMBOL(__kmalloc);
942
943+#ifdef CONFIG_NUMA
944 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
945 {
946 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
947@@ -2737,7 +2738,6 @@ static void *kmalloc_large_node(size_t s
948 return NULL;
949 }
950
951-#ifdef CONFIG_NUMA
952 void *__kmalloc_node(size_t size, gfp_t flags, int node)
953 {
954 struct kmem_cache *s;