1 From b969c4ab9f182a6e1b2a0848be349f99714947b0 Mon Sep 17 00:00:00 2001
2 From: Mel Gorman <mgorman@suse.de>
3 Date: Thu, 12 Jan 2012 17:19:34 -0800
4 Subject: mm: compaction: determine if dirty pages can be migrated without blocking within ->migratepage
6 From: Mel Gorman <mgorman@suse.de>
8 commit b969c4ab9f182a6e1b2a0848be349f99714947b0 upstream.
10 Stable note: Not tracked in Bugzilla. A fix aimed at preserving page
11 aging information by reducing LRU list churning had the side-effect
12 of reducing THP allocation success rates. This was part of a series
13 to restore the success rates while preserving the reclaim fix.
15 Asynchronous compaction is used when allocating transparent hugepages to
16 avoid blocking for long periods of time. Due to reports of stalling,
17 there was a debate on disabling synchronous compaction but this severely
18 impacted allocation success rates. Part of the reason was that many dirty
19 pages are skipped in asynchronous compaction by the following check;
21 if (PageDirty(page) && !sync &&
22 mapping->a_ops->migratepage != migrate_page)
25 This skips over all mapping aops using buffer_migrate_page() even though
26 it is possible to migrate some of these pages without blocking. This
27 patch updates the ->migratepage callback with a "sync" parameter. It is
28 the responsibility of the callback to fail gracefully if migration would
31 Signed-off-by: Mel Gorman <mgorman@suse.de>
32 Reviewed-by: Rik van Riel <riel@redhat.com>
33 Cc: Andrea Arcangeli <aarcange@redhat.com>
34 Cc: Minchan Kim <minchan.kim@gmail.com>
35 Cc: Dave Jones <davej@redhat.com>
36 Cc: Jan Kara <jack@suse.cz>
37 Cc: Andy Isaacson <adi@hexapodia.org>
38 Cc: Nai Xia <nai.xia@gmail.com>
39 Cc: Johannes Weiner <jweiner@redhat.com>
40 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
41 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
42 Signed-off-by: Mel Gorman <mgorman@suse.de>
43 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
46 fs/btrfs/disk-io.c | 4 -
47 fs/hugetlbfs/inode.c | 3 -
50 include/linux/fs.h | 9 ++-
51 include/linux/migrate.h | 2
52 mm/migrate.c | 129 ++++++++++++++++++++++++++++++++++--------------
53 7 files changed, 106 insertions(+), 47 deletions(-)
55 --- a/fs/btrfs/disk-io.c
56 +++ b/fs/btrfs/disk-io.c
57 @@ -801,7 +801,7 @@ static int btree_submit_bio_hook(struct
59 #ifdef CONFIG_MIGRATION
60 static int btree_migratepage(struct address_space *mapping,
61 - struct page *newpage, struct page *page)
62 + struct page *newpage, struct page *page, bool sync)
65 * we can't safely write a btree page from here,
66 @@ -816,7 +816,7 @@ static int btree_migratepage(struct addr
67 if (page_has_private(page) &&
68 !try_to_release_page(page, GFP_KERNEL))
70 - return migrate_page(mapping, newpage, page);
71 + return migrate_page(mapping, newpage, page, sync);
75 --- a/fs/hugetlbfs/inode.c
76 +++ b/fs/hugetlbfs/inode.c
77 @@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(stru
80 static int hugetlbfs_migrate_page(struct address_space *mapping,
81 - struct page *newpage, struct page *page)
82 + struct page *newpage, struct page *page,
87 --- a/fs/nfs/internal.h
88 +++ b/fs/nfs/internal.h
89 @@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs
91 #ifdef CONFIG_MIGRATION
92 extern int nfs_migrate_page(struct address_space *,
93 - struct page *, struct page *);
94 + struct page *, struct page *, bool);
96 #define nfs_migrate_page NULL
100 @@ -1662,7 +1662,7 @@ out_error:
102 #ifdef CONFIG_MIGRATION
103 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
105 + struct page *page, bool sync)
108 * If PagePrivate is set, then the page is currently associated with
109 @@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_spac
111 nfs_fscache_release_page(page, GFP_KERNEL);
113 - return migrate_page(mapping, newpage, page);
114 + return migrate_page(mapping, newpage, page, sync);
118 --- a/include/linux/fs.h
119 +++ b/include/linux/fs.h
120 @@ -607,9 +607,12 @@ struct address_space_operations {
121 loff_t offset, unsigned long nr_segs);
122 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
123 void **, unsigned long *);
124 - /* migrate the contents of a page to the specified target */
126 + * migrate the contents of a page to the specified target. If sync
127 + * is false, it must not block.
129 int (*migratepage) (struct address_space *,
130 - struct page *, struct page *);
131 + struct page *, struct page *, bool);
132 int (*launder_page) (struct page *);
133 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
135 @@ -2478,7 +2481,7 @@ extern int generic_check_addressable(uns
137 #ifdef CONFIG_MIGRATION
138 extern int buffer_migrate_page(struct address_space *,
139 - struct page *, struct page *);
140 + struct page *, struct page *, bool);
142 #define buffer_migrate_page NULL
144 --- a/include/linux/migrate.h
145 +++ b/include/linux/migrate.h
146 @@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct p
148 extern void putback_lru_pages(struct list_head *l);
149 extern int migrate_page(struct address_space *,
150 - struct page *, struct page *);
151 + struct page *, struct page *, bool);
152 extern int migrate_pages(struct list_head *l, new_page_t x,
153 unsigned long private, bool offlining,
157 @@ -220,6 +220,55 @@ out:
158 pte_unmap_unlock(ptep, ptl);
162 +/* Returns true if all buffers are successfully locked */
163 +static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
165 + struct buffer_head *bh = head;
167 + /* Simple case, sync compaction */
172 + bh = bh->b_this_page;
174 + } while (bh != head);
179 + /* async case, we cannot block on lock_buffer so use trylock_buffer */
182 + if (!trylock_buffer(bh)) {
184 + * We failed to lock the buffer and cannot stall in
185 + * async migration. Release the taken locks
187 + struct buffer_head *failed_bh = bh;
190 + while (bh != failed_bh) {
193 + bh = bh->b_this_page;
198 + bh = bh->b_this_page;
199 + } while (bh != head);
203 +static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
208 +#endif /* CONFIG_BLOCK */
211 * Replace the page in the mapping.
213 @@ -229,7 +278,8 @@ out:
214 * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
216 static int migrate_page_move_mapping(struct address_space *mapping,
217 - struct page *newpage, struct page *page)
218 + struct page *newpage, struct page *page,
219 + struct buffer_head *head, bool sync)
223 @@ -259,6 +309,19 @@ static int migrate_page_move_mapping(str
227 + * In the async migration case of moving a page with buffers, lock the
228 + * buffers using trylock before the mapping is moved. If the mapping
229 + * was moved, we later failed to lock the buffers and could not move
230 + * the mapping back due to an elevated page count, we would have to
231 + * block waiting on other references to be dropped.
233 + if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
234 + page_unfreeze_refs(page, expected_count);
235 + spin_unlock_irq(&mapping->tree_lock);
240 * Now we know that no one else is looking at the page.
242 get_page(newpage); /* add cache reference */
243 @@ -415,13 +478,13 @@ EXPORT_SYMBOL(fail_migrate_page);
244 * Pages are locked upon entry and exit.
246 int migrate_page(struct address_space *mapping,
247 - struct page *newpage, struct page *page)
248 + struct page *newpage, struct page *page, bool sync)
252 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
254 - rc = migrate_page_move_mapping(mapping, newpage, page);
255 + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
259 @@ -438,28 +501,28 @@ EXPORT_SYMBOL(migrate_page);
262 int buffer_migrate_page(struct address_space *mapping,
263 - struct page *newpage, struct page *page)
264 + struct page *newpage, struct page *page, bool sync)
266 struct buffer_head *bh, *head;
269 if (!page_has_buffers(page))
270 - return migrate_page(mapping, newpage, page);
271 + return migrate_page(mapping, newpage, page, sync);
273 head = page_buffers(page);
275 - rc = migrate_page_move_mapping(mapping, newpage, page);
276 + rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
285 - bh = bh->b_this_page;
287 - } while (bh != head);
289 + * In the async case, migrate_page_move_mapping locked the buffers
290 + * with an IRQ-safe spinlock held. In the sync case, the buffers
291 + * need to be locked now
294 + BUG_ON(!buffer_migrate_lock_buffers(head, sync));
296 ClearPagePrivate(page);
297 set_page_private(newpage, page_private(page));
298 @@ -536,10 +599,13 @@ static int writeout(struct address_space
299 * Default handling if a filesystem does not provide a migration function.
301 static int fallback_migrate_page(struct address_space *mapping,
302 - struct page *newpage, struct page *page)
303 + struct page *newpage, struct page *page, bool sync)
305 - if (PageDirty(page))
306 + if (PageDirty(page)) {
309 return writeout(mapping, page);
313 * Buffers may be managed in a filesystem specific way.
314 @@ -549,7 +615,7 @@ static int fallback_migrate_page(struct
315 !try_to_release_page(page, GFP_KERNEL))
318 - return migrate_page(mapping, newpage, page);
319 + return migrate_page(mapping, newpage, page, sync);
323 @@ -585,29 +651,18 @@ static int move_to_new_page(struct page
325 mapping = page_mapping(page);
327 - rc = migrate_page(mapping, newpage, page);
329 + rc = migrate_page(mapping, newpage, page, sync);
330 + else if (mapping->a_ops->migratepage)
332 - * Do not writeback pages if !sync and migratepage is
333 - * not pointing to migrate_page() which is nonblocking
334 - * (swapcache/tmpfs uses migratepage = migrate_page).
335 + * Most pages have a mapping and most filesystems provide a
336 + * migratepage callback. Anonymous pages are part of swap
337 + * space which also has its own migratepage callback. This
338 + * is the most common path for page migration.
340 - if (PageDirty(page) && !sync &&
341 - mapping->a_ops->migratepage != migrate_page)
343 - else if (mapping->a_ops->migratepage)
345 - * Most pages have a mapping and most filesystems
346 - * should provide a migration function. Anonymous
347 - * pages are part of swap space which also has its
348 - * own migration function. This is the most common
349 - * path for page migration.
351 - rc = mapping->a_ops->migratepage(mapping,
354 - rc = fallback_migrate_page(mapping, newpage, page);
356 + rc = mapping->a_ops->migratepage(mapping,
357 + newpage, page, sync);
359 + rc = fallback_migrate_page(mapping, newpage, page, sync);
362 newpage->mapping = NULL;