1 From: Peter Zijlstra <a.p.zijlstra@chello.nl>
2 Subject: mm: memory reserve management
4 References: FATE#303834
6 Generic reserve management code.
8 It provides methods to reserve and charge. Upon this, generic alloc/free style
9 reserve pools could be build, which could fully replace mempool_t
12 It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.
14 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
15 Acked-by: Neil Brown <neilb@suse.de>
16 Acked-by: Suresh Jayaraman <sjayaraman@suse.de>
19 include/linux/reserve.h | 198 ++++++++++++++
20 include/linux/slab.h | 20 -
22 mm/reserve.c | 637 ++++++++++++++++++++++++++++++++++++++++++++++++
24 5 files changed, 848 insertions(+), 11 deletions(-)
26 Index: linux-2.6.27/include/linux/reserve.h
27 ===================================================================
29 +++ linux-2.6.27/include/linux/reserve.h
32 + * Memory reserve management.
34 + * Copyright (C) 2007-2008 Red Hat, Inc.,
35 + * Peter Zijlstra <pzijlstr@redhat.com>
37 + * This file contains the public data structure and API definitions.
40 +#ifndef _LINUX_RESERVE_H
41 +#define _LINUX_RESERVE_H
43 +#include <linux/list.h>
44 +#include <linux/spinlock.h>
45 +#include <linux/wait.h>
46 +#include <linux/slab.h>
49 + struct mem_reserve *parent;
50 + struct list_head children;
51 + struct list_head siblings;
58 + spinlock_t lock; /* protects limit and usage */
60 + wait_queue_head_t waitqueue;
63 +extern struct mem_reserve mem_reserve_root;
65 +void mem_reserve_init(struct mem_reserve *res, const char *name,
66 + struct mem_reserve *parent);
67 +int mem_reserve_connect(struct mem_reserve *new_child,
68 + struct mem_reserve *node);
69 +void mem_reserve_disconnect(struct mem_reserve *node);
71 +int mem_reserve_pages_set(struct mem_reserve *res, long pages);
72 +int mem_reserve_pages_add(struct mem_reserve *res, long pages);
73 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages);
75 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
76 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes);
80 +int mem_reserve_kmem_cache_set(struct mem_reserve *res,
81 + struct kmem_cache *s,
83 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
84 + struct kmem_cache *s, long objs);
86 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
87 + struct mem_reserve *res, int *emerg);
90 +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
91 + struct mem_reserve *res, int *emerg)
95 + obj = __kmalloc_node_track_caller(size,
96 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node, ip);
98 + obj = ___kmalloc_reserve(size, flags, node, ip, res, emerg);
104 + * kmalloc_reserve() - kmalloc() and charge against @res for @emerg allocations
105 + * @size - size of the requested memory region
106 + * @gfp - allocation flags to use for this allocation
107 + * @node - preferred memory node for this allocation
108 + * @res - reserve to charge emergency allocations against
109 + * @emerg - bit 0 is set when the allocation was an emergency allocation
111 + * Returns NULL on failure
113 +#define kmalloc_reserve(size, gfp, node, res, emerg) \
114 + __kmalloc_reserve(size, gfp, node, \
115 + __builtin_return_address(0), res, emerg)
117 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg);
120 + * kfree_reserve() - kfree() and uncharge against @res for @emerg allocations
121 + * @obj - memory to free
122 + * @res - reserve to uncharge emergency allocations from
123 + * @emerg - was this an emergency allocation
126 +void kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
128 + if (unlikely(obj && res && emerg))
129 + __kfree_reserve(obj, res, emerg);
134 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
135 + struct mem_reserve *res, int *emerg);
138 + * kmem_cache_alloc_reserve() - kmem_cache_alloc() and charge against @res
139 + * @s - kmem_cache to allocate from
140 + * @gfp - allocation flags to use for this allocation
141 + * @node - preferred memory node for this allocation
142 + * @res - reserve to charge emergency allocations against
143 + * @emerg - bit 0 is set when the allocation was an emergency allocation
145 + * Returns NULL on failure
148 +void *kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
149 + struct mem_reserve *res, int *emerg)
153 + obj = kmem_cache_alloc_node(s,
154 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
156 + obj = __kmem_cache_alloc_reserve(s, flags, node, res, emerg);
161 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
162 + struct mem_reserve *res, int emerg);
165 + * kmem_cache_free_reserve() - kmem_cache_free() and uncharge against @res
166 + * @s - kmem_cache to free to
167 + * @obj - memory to free
168 + * @res - reserve to uncharge emergency allocations from
169 + * @emerg - was this an emergency allocation
172 +void kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
173 + struct mem_reserve *res, int emerg)
175 + if (unlikely(obj && res && emerg))
176 + __kmem_cache_free_reserve(s, obj, res, emerg);
178 + kmem_cache_free(s, obj);
181 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
182 + struct mem_reserve *res, int *emerg);
185 + * alloc_pages_reserve() - alloc_pages() and charge against @res
186 + * @node - preferred memory node for this allocation
187 + * @gfp - allocation flags to use for this allocation
188 + * @order - page order
189 + * @res - reserve to charge emergency allocations against
190 + * @emerg - bit 0 is set when the allocation was an emergency allocation
192 + * Returns NULL on failure
195 +struct page *alloc_pages_reserve(int node, gfp_t flags, int order,
196 + struct mem_reserve *res, int *emerg)
200 + page = alloc_pages_node(node,
201 + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, order);
203 + page = __alloc_pages_reserve(node, flags, order, res, emerg);
208 +void __free_pages_reserve(struct page *page, int order,
209 + struct mem_reserve *res, int emerg);
212 + * free_pages_reserve() - __free_pages() and uncharge against @res
213 + * @page - page to free
214 + * @order - page order
215 + * @res - reserve to uncharge emergency allocations from
216 + * @emerg - was this an emergency allocation
219 +void free_pages_reserve(struct page *page, int order,
220 + struct mem_reserve *res, int emerg)
222 + if (unlikely(page && res && emerg))
223 + __free_pages_reserve(page, order, res, emerg);
225 + __free_pages(page, order);
228 +#endif /* _LINUX_RESERVE_H */
229 Index: linux-2.6.27/mm/Makefile
230 ===================================================================
231 --- linux-2.6.27.orig/mm/Makefile
232 +++ linux-2.6.27/mm/Makefile
233 @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
234 maccess.o page_alloc.o page-writeback.o pdflush.o \
235 readahead.o swap.o truncate.o vmscan.o \
236 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
237 - page_isolation.o mm_init.o $(mmu-y)
238 + page_isolation.o mm_init.o reserve.o $(mmu-y)
240 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
241 obj-$(CONFIG_BOUNCE) += bounce.o
242 Index: linux-2.6.27/mm/reserve.c
243 ===================================================================
245 +++ linux-2.6.27/mm/reserve.c
248 + * Memory reserve management.
250 + * Copyright (C) 2007-2008, Red Hat, Inc.,
251 + * Peter Zijlstra <pzijlstr@redhat.com>
255 + * Manage a set of memory reserves.
257 + * A memory reserve is a reserve for a specified number of object of specified
258 + * size. Since memory is managed in pages, this reserve demand is then
259 + * translated into a page unit.
261 + * So each reserve has a specified object limit, an object usage count and a
262 + * number of pages required to back these objects.
264 + * Usage is charged against a reserve, if the charge fails, the resource must
265 + * not be allocated/used.
267 + * The reserves are managed in a tree, and the resource demands (pages and
268 + * limit) are propagated up the tree. Obviously the object limit will be
269 + * meaningless as soon as the unit starts mixing, but the required page reserve
270 + * (being of one unit) is still valid at the root.
272 + * It is the page demand of the root node that is used to set the global
273 + * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
275 + * As long as a subtree has the same usage unit, an aggregate node can be used
276 + * to charge against, instead of the leaf nodes. However, do be consistent with
277 + * who is charged, resource usage is not propagated up the tree (for
278 + * performance reasons).
281 +#include <linux/reserve.h>
282 +#include <linux/mutex.h>
283 +#include <linux/mmzone.h>
284 +#include <linux/log2.h>
285 +#include <linux/proc_fs.h>
286 +#include <linux/seq_file.h>
287 +#include <linux/module.h>
288 +#include <linux/slab.h>
289 +#include <linux/sched.h>
290 +#include "internal.h"
292 +static DEFINE_MUTEX(mem_reserve_mutex);
295 + * @mem_reserve_root - the global reserve root
297 + * The global reserve is empty, and has no limit unit, it merely
298 + * acts as an aggregation point for reserves and an interface to
299 + * adjust_memalloc_reserve().
301 +struct mem_reserve mem_reserve_root = {
302 + .children = LIST_HEAD_INIT(mem_reserve_root.children),
303 + .siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
304 + .name = "total reserve",
305 + .lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
306 + .waitqueue = __WAIT_QUEUE_HEAD_INITIALIZER(mem_reserve_root.waitqueue),
308 +EXPORT_SYMBOL_GPL(mem_reserve_root);
311 + * mem_reserve_init() - initialize a memory reserve object
312 + * @res - the new reserve object
313 + * @name - a name for this reserve
314 + * @parent - when non NULL, the parent to connect to.
316 +void mem_reserve_init(struct mem_reserve *res, const char *name,
317 + struct mem_reserve *parent)
319 + memset(res, 0, sizeof(*res));
320 + INIT_LIST_HEAD(&res->children);
321 + INIT_LIST_HEAD(&res->siblings);
323 + spin_lock_init(&res->lock);
324 + init_waitqueue_head(&res->waitqueue);
327 + mem_reserve_connect(res, parent);
329 +EXPORT_SYMBOL_GPL(mem_reserve_init);
332 + * propagate the pages and limit changes up the (sub)tree.
334 +static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
336 + unsigned long flags;
338 + for ( ; res; res = res->parent) {
339 + res->pages += pages;
342 + spin_lock_irqsave(&res->lock, flags);
343 + res->limit += limit;
344 + spin_unlock_irqrestore(&res->lock, flags);
350 + * __mem_reserve_add() - primitive to change the size of a reserve
351 + * @res - reserve to change
352 + * @pages - page delta
353 + * @limit - usage limit delta
355 + * Returns -ENOMEM when a size increase is not possible atm.
357 +static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
363 + * This looks more complex than need be, that is because we handle
364 + * the case where @res isn't actually connected to mem_reserve_root.
366 + * So, by propagating the new pages up the (sub)tree and computing
367 + * the difference in mem_reserve_root.pages we find if this action
368 + * affects the actual reserve.
370 + * The (partial) propagation also makes that mem_reserve_connect()
371 + * needs only look at the direct child, since each disconnected
372 + * sub-tree is fully up-to-date.
374 + reserve = mem_reserve_root.pages;
375 + __calc_reserve(res, pages, 0);
376 + reserve = mem_reserve_root.pages - reserve;
379 + ret = adjust_memalloc_reserve(reserve);
381 + __calc_reserve(res, -pages, 0);
385 + * Delay updating the limits until we've acquired the resources to
389 + __calc_reserve(res, 0, limit);
395 + * __mem_reserve_charge() - primitive to charge object usage of a reserve
396 + * @res - reserve to charge
397 + * @charge - size of the charge
399 + * Returns non-zero on success, zero on failure.
402 +int __mem_reserve_charge(struct mem_reserve *res, long charge)
404 + unsigned long flags;
407 + spin_lock_irqsave(&res->lock, flags);
408 + if (charge < 0 || res->usage + charge < res->limit) {
409 + res->usage += charge;
410 + if (unlikely(res->usage < 0))
415 + wake_up_all(&res->waitqueue);
416 + spin_unlock_irqrestore(&res->lock, flags);
422 + * mem_reserve_connect() - connect a reserve to another in a child-parent relation
423 + * @new_child - the reserve node to connect (child)
424 + * @node - the reserve node to connect to (parent)
426 + * Connecting a node results in an increase of the reserve by the amount of
427 + * pages in @new_child->pages if @node has a connection to mem_reserve_root.
429 + * Returns -ENOMEM when the new connection would increase the reserve (parent
430 + * is connected to mem_reserve_root) and there is no memory to do so.
432 + * On error, the child is _NOT_ connected.
434 +int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
438 + WARN_ON(!new_child->name);
440 + mutex_lock(&mem_reserve_mutex);
441 + if (new_child->parent) {
445 + new_child->parent = node;
446 + list_add(&new_child->siblings, &node->children);
447 + ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
449 + new_child->parent = NULL;
450 + list_del_init(&new_child->siblings);
453 + mutex_unlock(&mem_reserve_mutex);
457 +EXPORT_SYMBOL_GPL(mem_reserve_connect);
460 + * mem_reserve_disconnect() - sever a nodes connection to the reserve tree
461 + * @node - the node to disconnect
463 + * Disconnecting a node results in a reduction of the reserve by @node->pages
464 + * if node had a connection to mem_reserve_root.
466 +void mem_reserve_disconnect(struct mem_reserve *node)
470 + BUG_ON(!node->parent);
472 + mutex_lock(&mem_reserve_mutex);
473 + if (!node->parent) {
477 + ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
479 + node->parent = NULL;
480 + list_del_init(&node->siblings);
483 + mutex_unlock(&mem_reserve_mutex);
486 + * We cannot fail to shrink the reserves, can we?
490 +EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
492 +#ifdef CONFIG_PROC_FS
495 + * Simple output of the reserve tree in: /proc/reserve_info
498 + * localhost ~ # cat /proc/reserve_info
499 + * 1:0 "total reserve" 6232K 0/278581
500 + * 2:1 "total network reserve" 6232K 0/278581
501 + * 3:2 "network TX reserve" 212K 0/53
502 + * 4:3 "protocol TX pages" 212K 0/53
503 + * 5:2 "network RX reserve" 6020K 0/278528
504 + * 6:5 "IPv4 route cache" 5508K 0/16384
505 + * 7:5 "SKB data reserve" 512K 0/262144
506 + * 8:7 "IPv4 fragment cache" 512K 0/262144
509 +static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
510 + unsigned int parent, unsigned int *id)
512 + struct mem_reserve *child;
513 + unsigned int my_id = ++*id;
515 + seq_printf(m, "%d:%d \"%s\" %ldK %ld/%ld\n",
516 + my_id, parent, res->name,
517 + res->pages << (PAGE_SHIFT - 10),
518 + res->usage, res->limit);
520 + list_for_each_entry(child, &res->children, siblings)
521 + mem_reserve_show_item(m, child, my_id, id);
524 +static int mem_reserve_show(struct seq_file *m, void *v)
526 + unsigned int ident = 0;
528 + mutex_lock(&mem_reserve_mutex);
529 + mem_reserve_show_item(m, &mem_reserve_root, ident, &ident);
530 + mutex_unlock(&mem_reserve_mutex);
535 +static int mem_reserve_open(struct inode *inode, struct file *file)
537 + return single_open(file, mem_reserve_show, NULL);
540 +static const struct file_operations mem_reserve_opterations = {
541 + .open = mem_reserve_open,
543 + .llseek = seq_lseek,
544 + .release = single_release,
547 +static __init int mem_reserve_proc_init(void)
549 + proc_create("reserve_info", S_IRUSR, NULL, &mem_reserve_opterations);
553 +module_init(mem_reserve_proc_init);
558 + * alloc_page helpers
562 + * mem_reserve_pages_set() - set reserves size in pages
563 + * @res - reserve to set
564 + * @pages - size in pages to set it to
566 + * Returns -ENOMEM when it fails to set the reserve. On failure the old size
569 +int mem_reserve_pages_set(struct mem_reserve *res, long pages)
573 + mutex_lock(&mem_reserve_mutex);
574 + pages -= res->pages;
575 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
576 + mutex_unlock(&mem_reserve_mutex);
580 +EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
583 + * mem_reserve_pages_add() - change the size in a relative way
584 + * @res - reserve to change
585 + * @pages - number of pages to add (or subtract when negative)
587 + * Similar to mem_reserve_pages_set, except that the argument is relative
588 + * instead of absolute.
590 + * Returns -ENOMEM when it fails to increase.
592 +int mem_reserve_pages_add(struct mem_reserve *res, long pages)
596 + mutex_lock(&mem_reserve_mutex);
597 + ret = __mem_reserve_add(res, pages, pages * PAGE_SIZE);
598 + mutex_unlock(&mem_reserve_mutex);
604 + * mem_reserve_pages_charge() - charge page usage to a reserve
605 + * @res - reserve to charge
606 + * @pages - size to charge
608 + * Returns non-zero on success.
610 +int mem_reserve_pages_charge(struct mem_reserve *res, long pages)
612 + return __mem_reserve_charge(res, pages * PAGE_SIZE);
614 +EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
621 + * mem_reserve_kmalloc_set() - set this reserve to bytes worth of kmalloc
622 + * @res - reserve to change
623 + * @bytes - size in bytes to reserve
625 + * Returns -ENOMEM on failure.
627 +int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
632 + mutex_lock(&mem_reserve_mutex);
633 + pages = kmalloc_estimate_bytes(GFP_ATOMIC, bytes);
634 + pages -= res->pages;
635 + bytes -= res->limit;
636 + ret = __mem_reserve_add(res, pages, bytes);
637 + mutex_unlock(&mem_reserve_mutex);
641 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
644 + * mem_reserve_kmalloc_charge() - charge bytes to a reserve
645 + * @res - reserve to charge
646 + * @bytes - bytes to charge
648 + * Returns non-zero on success.
650 +int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes)
653 + bytes = -roundup_pow_of_two(-bytes);
655 + bytes = roundup_pow_of_two(bytes);
657 + return __mem_reserve_charge(res, bytes);
659 +EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
662 + * kmem_cache helpers
666 + * mem_reserve_kmem_cache_set() - set reserve to @objects worth of kmem_cache_alloc of @s
667 + * @res - reserve to set
668 + * @s - kmem_cache to reserve from
669 + * @objects - number of objects to reserve
671 + * Returns -ENOMEM on failure.
673 +int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
679 + mutex_lock(&mem_reserve_mutex);
680 + pages = kmem_alloc_estimate(s, GFP_ATOMIC, objects);
681 + pages -= res->pages;
682 + bytes = objects * kmem_cache_size(s) - res->limit;
683 + ret = __mem_reserve_add(res, pages, bytes);
684 + mutex_unlock(&mem_reserve_mutex);
688 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
691 + * mem_reserve_kmem_cache_charge() - charge (or uncharge) usage of objs
692 + * @res - reserve to charge
693 + * @objs - objects to charge for
695 + * Returns non-zero on success.
697 +int mem_reserve_kmem_cache_charge(struct mem_reserve *res, struct kmem_cache *s,
700 + return __mem_reserve_charge(res, objs * kmem_cache_size(s));
702 +EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);
707 + * Actual usage is commented in linux/reserve.h where the interface functions
708 + * live. Furthermore, the code is 3 instances of the same paradigm, hence only
709 + * the first contains extensive comments.
716 +void *___kmalloc_reserve(size_t size, gfp_t flags, int node, void *ip,
717 + struct mem_reserve *res, int *emerg)
723 + * Try a regular allocation, when that fails and we're not entitled
724 + * to the reserves, fail.
726 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
727 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
729 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
733 + * If we were given a reserve to charge against, try that.
735 + if (res && !mem_reserve_kmalloc_charge(res, size)) {
737 + * If we failed to charge and we're not allowed to wait for
738 + * it to succeed, bail.
740 + if (!(flags & __GFP_WAIT))
744 + * Wait for a successfull charge against the reserve. All
745 + * uncharge operations against this reserve will wake us up.
747 + wait_event(res->waitqueue,
748 + mem_reserve_kmalloc_charge(res, size));
751 + * After waiting for it, again try a regular allocation.
752 + * Pressure could have lifted during our sleep. If this
753 + * succeeds, uncharge the reserve.
755 + obj = __kmalloc_node_track_caller(size, gfp, node, ip);
757 + mem_reserve_kmalloc_charge(res, -size);
763 + * Regular allocation failed, and we've successfully charged our
764 + * requested usage against the reserve. Do the emergency allocation.
766 + obj = __kmalloc_node_track_caller(size, flags, node, ip);
775 +void __kfree_reserve(void *obj, struct mem_reserve *res, int emerg)
778 + * ksize gives the full allocated size vs the requested size we used to
779 + * charge; however since we round up to the nearest power of two, this
780 + * should all work nicely.
782 + size_t size = ksize(obj);
786 + * Free before uncharge, this ensures memory is actually present when
787 + * a subsequent charge succeeds.
789 + mem_reserve_kmalloc_charge(res, -size);
793 + * kmem_cache_alloc/kmem_cache_free
796 +void *__kmem_cache_alloc_reserve(struct kmem_cache *s, gfp_t flags, int node,
797 + struct mem_reserve *res, int *emerg)
802 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
803 + obj = kmem_cache_alloc_node(s, gfp, node);
805 + if (obj || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
808 + if (res && !mem_reserve_kmem_cache_charge(res, s, 1)) {
809 + if (!(flags & __GFP_WAIT))
812 + wait_event(res->waitqueue,
813 + mem_reserve_kmem_cache_charge(res, s, 1));
815 + obj = kmem_cache_alloc_node(s, gfp, node);
817 + mem_reserve_kmem_cache_charge(res, s, -1);
822 + obj = kmem_cache_alloc_node(s, flags, node);
831 +void __kmem_cache_free_reserve(struct kmem_cache *s, void *obj,
832 + struct mem_reserve *res, int emerg)
834 + kmem_cache_free(s, obj);
835 + mem_reserve_kmem_cache_charge(res, s, -1);
839 + * alloc_pages/free_pages
842 +struct page *__alloc_pages_reserve(int node, gfp_t flags, int order,
843 + struct mem_reserve *res, int *emerg)
847 + long pages = 1 << order;
849 + gfp = flags | __GFP_NOMEMALLOC | __GFP_NOWARN;
850 + page = alloc_pages_node(node, gfp, order);
852 + if (page || !(gfp_to_alloc_flags(flags) & ALLOC_NO_WATERMARKS))
855 + if (res && !mem_reserve_pages_charge(res, pages)) {
856 + if (!(flags & __GFP_WAIT))
859 + wait_event(res->waitqueue,
860 + mem_reserve_pages_charge(res, pages));
862 + page = alloc_pages_node(node, gfp, order);
864 + mem_reserve_pages_charge(res, -pages);
869 + page = alloc_pages_node(node, flags, order);
878 +void __free_pages_reserve(struct page *page, int order,
879 + struct mem_reserve *res, int emerg)
881 + __free_pages(page, order);
882 + mem_reserve_pages_charge(res, -(1 << order));
884 Index: linux-2.6.27/include/linux/slab.h
885 ===================================================================
886 --- linux-2.6.27.orig/include/linux/slab.h
887 +++ linux-2.6.27/include/linux/slab.h
888 @@ -230,13 +230,14 @@ static inline void *kmem_cache_alloc_nod
890 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
891 extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
892 -#define kmalloc_track_caller(size, flags) \
893 - __kmalloc_track_caller(size, flags, __builtin_return_address(0))
895 -#define kmalloc_track_caller(size, flags) \
896 +#define __kmalloc_track_caller(size, flags, ip) \
897 __kmalloc(size, flags)
898 #endif /* DEBUG_SLAB */
900 +#define kmalloc_track_caller(size, flags) \
901 + __kmalloc_track_caller(size, flags, __builtin_return_address(0))
905 * kmalloc_node_track_caller is a special version of kmalloc_node that
906 @@ -248,21 +249,22 @@ extern void *__kmalloc_track_caller(size
908 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
909 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
910 -#define kmalloc_node_track_caller(size, flags, node) \
911 - __kmalloc_node_track_caller(size, flags, node, \
912 - __builtin_return_address(0))
914 -#define kmalloc_node_track_caller(size, flags, node) \
915 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
916 __kmalloc_node(size, flags, node)
919 #else /* CONFIG_NUMA */
921 -#define kmalloc_node_track_caller(size, flags, node) \
922 - kmalloc_track_caller(size, flags)
923 +#define __kmalloc_node_track_caller(size, flags, node, ip) \
924 + __kmalloc_track_caller(size, flags, ip)
926 #endif /* DEBUG_SLAB */
928 +#define kmalloc_node_track_caller(size, flags, node) \
929 + __kmalloc_node_track_caller(size, flags, node, \
930 + __builtin_return_address(0))
935 Index: linux-2.6.27/mm/slub.c
936 ===================================================================
937 --- linux-2.6.27.orig/mm/slub.c
938 +++ linux-2.6.27/mm/slub.c
939 @@ -2726,6 +2726,7 @@ void *__kmalloc(size_t size, gfp_t flags
941 EXPORT_SYMBOL(__kmalloc);
944 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
946 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
947 @@ -2737,7 +2738,6 @@ static void *kmalloc_large_node(size_t s
952 void *__kmalloc_node(size_t size, gfp_t flags, int node)
954 struct kmem_cache *s;