1 From 29c1f677d424e8c5683a837fc4f03fc9f19201d7 Mon Sep 17 00:00:00 2001
2 From: Mel Gorman <mel@csn.ul.ie>
3 Date: Thu, 13 Jan 2011 15:47:21 -0800
4 Subject: mm: migration: use rcu_dereference_protected when dereferencing the radix tree slot during file page migration
6 From: Mel Gorman <mel@csn.ul.ie>
8 commit 29c1f677d424e8c5683a837fc4f03fc9f19201d7 upstream.
10 migrate_pages() -> unmap_and_move() only calls rcu_read_lock() for
11 anonymous pages, as introduced by git commit
12 989f89c57e6361e7d16fbd9572b5da7d313b073d ("fix rcu_read_lock() in page
13 migraton"). The point of the RCU protection there is part of getting a
14 stable reference to anon_vma and is only held for anon pages as file pages
15 are locked which is sufficient protection against freeing.
17 However, while a file page's mapping is being migrated, the radix tree is
18 double checked to ensure it is the expected page. This uses
19 radix_tree_deref_slot() -> rcu_dereference() without the RCU lock held
20 triggering the following warning.
22 [ 173.674290] ===================================================
23 [ 173.676016] [ INFO: suspicious rcu_dereference_check() usage. ]
24 [ 173.676016] ---------------------------------------------------
25 [ 173.676016] include/linux/radix-tree.h:145 invoked rcu_dereference_check() without protection!
27 [ 173.676016] other info that might help us debug this:
30 [ 173.676016] rcu_scheduler_active = 1, debug_locks = 0
31 [ 173.676016] 1 lock held by hugeadm/2899:
32 [ 173.676016] #0: (&(&inode->i_data.tree_lock)->rlock){..-.-.}, at: [<c10e3d2b>] migrate_page_move_mapping+0x40/0x1ab
34 [ 173.676016] stack backtrace:
35 [ 173.676016] Pid: 2899, comm: hugeadm Not tainted 2.6.37-rc5-autobuild
36 [ 173.676016] Call Trace:
37 [ 173.676016] [<c128cc01>] ? printk+0x14/0x1b
38 [ 173.676016] [<c1063502>] lockdep_rcu_dereference+0x7d/0x86
39 [ 173.676016] [<c10e3db5>] migrate_page_move_mapping+0xca/0x1ab
40 [ 173.676016] [<c10e41ad>] migrate_page+0x23/0x39
41 [ 173.676016] [<c10e491b>] buffer_migrate_page+0x22/0x107
42 [ 173.676016] [<c10e48f9>] ? buffer_migrate_page+0x0/0x107
43 [ 173.676016] [<c10e425d>] move_to_new_page+0x9a/0x1ae
44 [ 173.676016] [<c10e47e6>] migrate_pages+0x1e7/0x2fa
46 This patch introduces radix_tree_deref_slot_protected() which calls
47 rcu_dereference_protected(). Users of it must pass in the
48 mapping->tree_lock that is protecting this dereference. Holding the tree
49 lock protects against parallel updaters of the radix tree meaning that
50 rcu_dereference_protected is allowable.
52 [akpm@linux-foundation.org: remove unneeded casts]
53 Signed-off-by: Mel Gorman <mel@csn.ul.ie>
54 Cc: Minchan Kim <minchan.kim@gmail.com>
55 Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
56 Cc: Milton Miller <miltonm@bga.com>
57 Cc: Nick Piggin <nickpiggin@yahoo.com.au>
58 Cc: Wu Fengguang <fengguang.wu@intel.com>
59 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
60 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
61 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
64 include/linux/radix-tree.h | 16 ++++++++++++++++
66 2 files changed, 18 insertions(+), 2 deletions(-)
68 --- a/include/linux/radix-tree.h
69 +++ b/include/linux/radix-tree.h
70 @@ -146,6 +146,22 @@ static inline void *radix_tree_deref_slo
74 + * radix_tree_deref_slot_protected - dereference a slot without RCU lock but with tree lock held
75 + * @pslot: pointer to slot, returned by radix_tree_lookup_slot
76 + * Returns: item that was stored in that slot with any direct pointer flag
79 + * Similar to radix_tree_deref_slot but only used during migration when a pages
80 + * mapping is being moved. The caller does not hold the RCU read lock but it
81 + * must hold the tree lock to prevent parallel updates.
83 +static inline void *radix_tree_deref_slot_protected(void **pslot,
84 + spinlock_t *treelock)
86 + return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
90 * radix_tree_deref_retry - check radix_tree_deref_slot
91 * @arg: pointer returned by radix_tree_deref_slot
92 * Returns: 0 if retry is not required, otherwise retry is required
95 @@ -246,7 +246,7 @@ static int migrate_page_move_mapping(str
97 expected_count = 2 + page_has_private(page);
98 if (page_count(page) != expected_count ||
99 - (struct page *)radix_tree_deref_slot(pslot) != page) {
100 + radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
101 spin_unlock_irq(&mapping->tree_lock);
104 @@ -318,7 +318,7 @@ int migrate_huge_page_move_mapping(struc
106 expected_count = 2 + page_has_private(page);
107 if (page_count(page) != expected_count ||
108 - (struct page *)radix_tree_deref_slot(pslot) != page) {
109 + radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
110 spin_unlock_irq(&mapping->tree_lock);