]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
fc58f25d6ee5ce0144a3f2f835cf50790d7e22d2
[thirdparty/kernel/stable-queue.git] /
1 From b969c4ab9f182a6e1b2a0848be349f99714947b0 Mon Sep 17 00:00:00 2001
2 From: Mel Gorman <mgorman@suse.de>
3 Date: Thu, 12 Jan 2012 17:19:34 -0800
4 Subject: mm: compaction: determine if dirty pages can be migrated without blocking within ->migratepage
5
6 From: Mel Gorman <mgorman@suse.de>
7
8 commit b969c4ab9f182a6e1b2a0848be349f99714947b0 upstream.
9
10 Stable note: Not tracked in Bugzilla. A fix aimed at preserving page
11 aging information by reducing LRU list churning had the side-effect
12 of reducing THP allocation success rates. This was part of a series
13 to restore the success rates while preserving the reclaim fix.
14
15 Asynchronous compaction is used when allocating transparent hugepages to
16 avoid blocking for long periods of time. Due to reports of stalling,
17 there was a debate on disabling synchronous compaction but this severely
18 impacted allocation success rates. Part of the reason was that many dirty
19 pages are skipped in asynchronous compaction by the following check;
20
21 if (PageDirty(page) && !sync &&
22 mapping->a_ops->migratepage != migrate_page)
23 rc = -EBUSY;
24
25 This skips over all mapping aops using buffer_migrate_page() even though
26 it is possible to migrate some of these pages without blocking. This
27 patch updates the ->migratepage callback with a "sync" parameter. It is
28 the responsibility of the callback to fail gracefully if migration would
29 block.
30
31 Signed-off-by: Mel Gorman <mgorman@suse.de>
32 Reviewed-by: Rik van Riel <riel@redhat.com>
33 Cc: Andrea Arcangeli <aarcange@redhat.com>
34 Cc: Minchan Kim <minchan.kim@gmail.com>
35 Cc: Dave Jones <davej@redhat.com>
36 Cc: Jan Kara <jack@suse.cz>
37 Cc: Andy Isaacson <adi@hexapodia.org>
38 Cc: Nai Xia <nai.xia@gmail.com>
39 Cc: Johannes Weiner <jweiner@redhat.com>
40 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
41 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
42 Signed-off-by: Mel Gorman <mgorman@suse.de>
43 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
44
45 ---
46 fs/btrfs/disk-io.c | 4 -
47 fs/hugetlbfs/inode.c | 3 -
48 fs/nfs/internal.h | 2
49 fs/nfs/write.c | 4 -
50 include/linux/fs.h | 9 ++-
51 include/linux/migrate.h | 2
52 mm/migrate.c | 129 ++++++++++++++++++++++++++++++++++--------------
53 7 files changed, 106 insertions(+), 47 deletions(-)
54
55 --- a/fs/btrfs/disk-io.c
56 +++ b/fs/btrfs/disk-io.c
57 @@ -801,7 +801,7 @@ static int btree_submit_bio_hook(struct
58
59 #ifdef CONFIG_MIGRATION
60 static int btree_migratepage(struct address_space *mapping,
61 - struct page *newpage, struct page *page)
62 + struct page *newpage, struct page *page, bool sync)
63 {
64 /*
65 * we can't safely write a btree page from here,
66 @@ -816,7 +816,7 @@ static int btree_migratepage(struct addr
67 if (page_has_private(page) &&
68 !try_to_release_page(page, GFP_KERNEL))
69 return -EAGAIN;
70 - return migrate_page(mapping, newpage, page);
71 + return migrate_page(mapping, newpage, page, sync);
72 }
73 #endif
74
75 --- a/fs/hugetlbfs/inode.c
76 +++ b/fs/hugetlbfs/inode.c
77 @@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(stru
78 }
79
80 static int hugetlbfs_migrate_page(struct address_space *mapping,
81 - struct page *newpage, struct page *page)
82 + struct page *newpage, struct page *page,
83 + bool sync)
84 {
85 int rc;
86
87 --- a/fs/nfs/internal.h
88 +++ b/fs/nfs/internal.h
89 @@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs
90
91 #ifdef CONFIG_MIGRATION
92 extern int nfs_migrate_page(struct address_space *,
93 - struct page *, struct page *);
94 + struct page *, struct page *, bool);
95 #else
96 #define nfs_migrate_page NULL
97 #endif
98 --- a/fs/nfs/write.c
99 +++ b/fs/nfs/write.c
100 @@ -1662,7 +1662,7 @@ out_error:
101
102 #ifdef CONFIG_MIGRATION
103 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
104 - struct page *page)
105 + struct page *page, bool sync)
106 {
107 /*
108 * If PagePrivate is set, then the page is currently associated with
109 @@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_spac
110
111 nfs_fscache_release_page(page, GFP_KERNEL);
112
113 - return migrate_page(mapping, newpage, page);
114 + return migrate_page(mapping, newpage, page, sync);
115 }
116 #endif
117
118 --- a/include/linux/fs.h
119 +++ b/include/linux/fs.h
120 @@ -607,9 +607,12 @@ struct address_space_operations {
121 loff_t offset, unsigned long nr_segs);
122 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
123 void **, unsigned long *);
124 - /* migrate the contents of a page to the specified target */
125 + /*
126 + * migrate the contents of a page to the specified target. If sync
127 + * is false, it must not block.
128 + */
129 int (*migratepage) (struct address_space *,
130 - struct page *, struct page *);
131 + struct page *, struct page *, bool);
132 int (*launder_page) (struct page *);
133 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
134 unsigned long);
135 @@ -2478,7 +2481,7 @@ extern int generic_check_addressable(uns
136
137 #ifdef CONFIG_MIGRATION
138 extern int buffer_migrate_page(struct address_space *,
139 - struct page *, struct page *);
140 + struct page *, struct page *, bool);
141 #else
142 #define buffer_migrate_page NULL
143 #endif
144 --- a/include/linux/migrate.h
145 +++ b/include/linux/migrate.h
146 @@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct p
147
148 extern void putback_lru_pages(struct list_head *l);
149 extern int migrate_page(struct address_space *,
150 - struct page *, struct page *);
151 + struct page *, struct page *, bool);
152 extern int migrate_pages(struct list_head *l, new_page_t x,
153 unsigned long private, bool offlining,
154 bool sync);
155 --- a/mm/migrate.c
156 +++ b/mm/migrate.c
157 @@ -220,6 +220,55 @@ out:
158 pte_unmap_unlock(ptep, ptl);
159 }
160
161 +#ifdef CONFIG_BLOCK
162 +/* Returns true if all buffers are successfully locked */
163 +static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
164 +{
165 + struct buffer_head *bh = head;
166 +
167 + /* Simple case, sync compaction */
168 + if (sync) {
169 + do {
170 + get_bh(bh);
171 + lock_buffer(bh);
172 + bh = bh->b_this_page;
173 +
174 + } while (bh != head);
175 +
176 + return true;
177 + }
178 +
179 + /* async case, we cannot block on lock_buffer so use trylock_buffer */
180 + do {
181 + get_bh(bh);
182 + if (!trylock_buffer(bh)) {
183 + /*
184 + * We failed to lock the buffer and cannot stall in
185 + * async migration. Release the taken locks
186 + */
187 + struct buffer_head *failed_bh = bh;
188 + put_bh(failed_bh);
189 + bh = head;
190 + while (bh != failed_bh) {
191 + unlock_buffer(bh);
192 + put_bh(bh);
193 + bh = bh->b_this_page;
194 + }
195 + return false;
196 + }
197 +
198 + bh = bh->b_this_page;
199 + } while (bh != head);
200 + return true;
201 +}
202 +#else
203 +static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
204 + bool sync)
205 +{
206 + return true;
207 +}
208 +#endif /* CONFIG_BLOCK */
209 +
210 /*
211 * Replace the page in the mapping.
212 *
213 @@ -229,7 +278,8 @@ out:
214 * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
215 */
216 static int migrate_page_move_mapping(struct address_space *mapping,
217 - struct page *newpage, struct page *page)
218 + struct page *newpage, struct page *page,
219 + struct buffer_head *head, bool sync)
220 {
221 int expected_count;
222 void **pslot;
223 @@ -259,6 +309,19 @@ static int migrate_page_move_mapping(str
224 }
225
226 /*
227 + * In the async migration case of moving a page with buffers, lock the
228 + * buffers using trylock before the mapping is moved. If the mapping
229 + * was moved, we later failed to lock the buffers and could not move
230 + * the mapping back due to an elevated page count, we would have to
231 + * block waiting on other references to be dropped.
232 + */
233 + if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
234 + page_unfreeze_refs(page, expected_count);
235 + spin_unlock_irq(&mapping->tree_lock);
236 + return -EAGAIN;
237 + }
238 +
239 + /*
240 * Now we know that no one else is looking at the page.
241 */
242 get_page(newpage); /* add cache reference */
243 @@ -415,13 +478,13 @@ EXPORT_SYMBOL(fail_migrate_page);
244 * Pages are locked upon entry and exit.
245 */
246 int migrate_page(struct address_space *mapping,
247 - struct page *newpage, struct page *page)
248 + struct page *newpage, struct page *page, bool sync)
249 {
250 int rc;
251
252 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
253
254 - rc = migrate_page_move_mapping(mapping, newpage, page);
255 + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
256
257 if (rc)
258 return rc;
259 @@ -438,28 +501,28 @@ EXPORT_SYMBOL(migrate_page);
260 * exist.
261 */
262 int buffer_migrate_page(struct address_space *mapping,
263 - struct page *newpage, struct page *page)
264 + struct page *newpage, struct page *page, bool sync)
265 {
266 struct buffer_head *bh, *head;
267 int rc;
268
269 if (!page_has_buffers(page))
270 - return migrate_page(mapping, newpage, page);
271 + return migrate_page(mapping, newpage, page, sync);
272
273 head = page_buffers(page);
274
275 - rc = migrate_page_move_mapping(mapping, newpage, page);
276 + rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
277
278 if (rc)
279 return rc;
280
281 - bh = head;
282 - do {
283 - get_bh(bh);
284 - lock_buffer(bh);
285 - bh = bh->b_this_page;
286 -
287 - } while (bh != head);
288 + /*
289 + * In the async case, migrate_page_move_mapping locked the buffers
290 + * with an IRQ-safe spinlock held. In the sync case, the buffers
291 + * need to be locked now
292 + */
293 + if (sync)
294 + BUG_ON(!buffer_migrate_lock_buffers(head, sync));
295
296 ClearPagePrivate(page);
297 set_page_private(newpage, page_private(page));
298 @@ -536,10 +599,13 @@ static int writeout(struct address_space
299 * Default handling if a filesystem does not provide a migration function.
300 */
301 static int fallback_migrate_page(struct address_space *mapping,
302 - struct page *newpage, struct page *page)
303 + struct page *newpage, struct page *page, bool sync)
304 {
305 - if (PageDirty(page))
306 + if (PageDirty(page)) {
307 + if (!sync)
308 + return -EBUSY;
309 return writeout(mapping, page);
310 + }
311
312 /*
313 * Buffers may be managed in a filesystem specific way.
314 @@ -549,7 +615,7 @@ static int fallback_migrate_page(struct
315 !try_to_release_page(page, GFP_KERNEL))
316 return -EAGAIN;
317
318 - return migrate_page(mapping, newpage, page);
319 + return migrate_page(mapping, newpage, page, sync);
320 }
321
322 /*
323 @@ -585,29 +651,18 @@ static int move_to_new_page(struct page
324
325 mapping = page_mapping(page);
326 if (!mapping)
327 - rc = migrate_page(mapping, newpage, page);
328 - else {
329 + rc = migrate_page(mapping, newpage, page, sync);
330 + else if (mapping->a_ops->migratepage)
331 /*
332 - * Do not writeback pages if !sync and migratepage is
333 - * not pointing to migrate_page() which is nonblocking
334 - * (swapcache/tmpfs uses migratepage = migrate_page).
335 + * Most pages have a mapping and most filesystems provide a
336 + * migratepage callback. Anonymous pages are part of swap
337 + * space which also has its own migratepage callback. This
338 + * is the most common path for page migration.
339 */
340 - if (PageDirty(page) && !sync &&
341 - mapping->a_ops->migratepage != migrate_page)
342 - rc = -EBUSY;
343 - else if (mapping->a_ops->migratepage)
344 - /*
345 - * Most pages have a mapping and most filesystems
346 - * should provide a migration function. Anonymous
347 - * pages are part of swap space which also has its
348 - * own migration function. This is the most common
349 - * path for page migration.
350 - */
351 - rc = mapping->a_ops->migratepage(mapping,
352 - newpage, page);
353 - else
354 - rc = fallback_migrate_page(mapping, newpage, page);
355 - }
356 + rc = mapping->a_ops->migratepage(mapping,
357 + newpage, page, sync);
358 + else
359 + rc = fallback_migrate_page(mapping, newpage, page, sync);
360
361 if (rc) {
362 newpage->mapping = NULL;