From 174078794e045aff06b838e067d9d1df8a2f50d4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 Jul 2012 11:21:45 -0700 Subject: [PATCH] 3.0-stable patches added patches: kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch mm-compaction-make-isolate_lru_page-filter-aware-again.patch mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch --- ...asszone_idx-after-wakeup-in-sleeping.patch | 61 ++ ...ance-after-an-unsuccessful-balancing.patch | 96 ++++ ...ight-migration-for-use-by-compaction.patch | 525 ++++++++++++++++++ ...-isolate_lru_page-filter-aware-again.patch | 110 ++++ ...mm-migration-clean-up-unmap_and_move.patch | 2 + ...cations-while-compaction-is-deferred.patch | 140 +++++ queue-3.0/series | 5 + 7 files changed, 939 insertions(+) create mode 100644 queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch create mode 100644 queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch create mode 100644 queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch create mode 100644 queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch create mode 100644 queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch diff --git a/queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch b/queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch new file mode 100644 index 00000000000..fc06831096e --- /dev/null +++ b/queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch @@ -0,0 +1,61 @@ +From f0dfcde099453aa4c0dc42473828d15a6d492936 Mon Sep 17 00:00:00 2001 +From: Alex Shi +Date: Mon, 31 Oct 2011 17:08:45 -0700 +Subject: kswapd: assign new_order and new_classzone_idx after wakeup in sleeping +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +commit f0dfcde099453aa4c0dc42473828d15a6d492936 upstream. + +Stable note: Fixes https://bugzilla.redhat.com/show_bug.cgi?id=712019. This + patch reduces kswapd CPU usage. + +There 2 places to read pgdat in kswapd. One is return from a successful +balance, another is waked up from kswapd sleeping. The new_order and +new_classzone_idx represent the balance input order and classzone_idx. + +But current new_order and new_classzone_idx are not assigned after +kswapd_try_to_sleep(), that will cause a bug in the following scenario. + +1: after a successful balance, kswapd goes to sleep, and new_order = 0; + new_classzone_idx = __MAX_NR_ZONES - 1; + +2: kswapd waked up with order = 3 and classzone_idx = ZONE_NORMAL + +3: in the balance_pgdat() running, a new balance wakeup happened with + order = 5, and classzone_idx = ZONE_NORMAL + +4: the first wakeup(order = 3) finished successufly, return order = 3 + but, the new_order is still 0, so, this balancing will be treated as a + failed balance. And then the second tighter balancing will be missed. + +So, to avoid the above problem, the new_order and new_classzone_idx need +to be assigned for later successful comparison. + +Signed-off-by: Alex Shi +Acked-by: Mel Gorman +Reviewed-by: Minchan Kim +Tested-by: Pádraig Brady +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2908,6 +2908,8 @@ static int kswapd(void *p) + balanced_classzone_idx); + order = pgdat->kswapd_max_order; + classzone_idx = pgdat->classzone_idx; ++ new_order = order; ++ new_classzone_idx = classzone_idx; + pgdat->kswapd_max_order = 0; + pgdat->classzone_idx = pgdat->nr_zones - 1; + } diff --git a/queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch b/queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch new file mode 100644 index 00000000000..c5a3096d377 --- /dev/null +++ b/queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch @@ -0,0 +1,96 @@ +From d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b Mon Sep 17 00:00:00 2001 +From: Alex Shi +Date: Mon, 31 Oct 2011 17:08:39 -0700 +Subject: kswapd: avoid unnecessary rebalance after an unsuccessful balancing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +commit d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b upstream. + +Stable note: Fixes https://bugzilla.redhat.com/show_bug.cgi?id=712019. This + patch reduces kswapd CPU usage. + +In commit 215ddd66 ("mm: vmscan: only read new_classzone_idx from pgdat +when reclaiming successfully") , Mel Gorman said kswapd is better to sleep +after a unsuccessful balancing if there is tighter reclaim request pending +in the balancing. But in the following scenario, kswapd do something that +is not matched our expectation. The patch fixes this issue. + +1, Read pgdat request A (classzone_idx, order = 3) +2, balance_pgdat() +3, During pgdat, a new pgdat request B (classzone_idx, order = 5) is placed +4, balance_pgdat() returns but failed since returned order = 0 +5, pgdat of request A assigned to balance_pgdat(), and do balancing again. + While the expectation behavior of kswapd should try to sleep. + +Signed-off-by: Alex Shi +Reviewed-by: Tim Chen +Acked-by: Mel Gorman +Tested-by: Pádraig Brady +Cc: Rik van Riel +Cc: KOSAKI Motohiro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2844,7 +2844,9 @@ static void kswapd_try_to_sleep(pg_data_ + static int kswapd(void *p) + { + unsigned long order, new_order; ++ unsigned balanced_order; + int classzone_idx, new_classzone_idx; ++ int balanced_classzone_idx; + pg_data_t *pgdat = (pg_data_t*)p; + struct task_struct *tsk = current; + +@@ -2875,7 +2877,9 @@ static int kswapd(void *p) + set_freezable(); + + order = new_order = 0; ++ balanced_order = 0; + classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; ++ balanced_classzone_idx = classzone_idx; + for ( ; ; ) { + int ret; + +@@ -2884,7 +2888,8 @@ static int kswapd(void *p) + * new request of a similar or harder type will succeed soon + * so consider going to sleep on the basis we reclaimed at + */ +- if (classzone_idx >= new_classzone_idx && order == new_order) { ++ if (balanced_classzone_idx >= new_classzone_idx && ++ balanced_order == new_order) { + new_order = pgdat->kswapd_max_order; + new_classzone_idx = pgdat->classzone_idx; + pgdat->kswapd_max_order = 0; +@@ -2899,7 +2904,8 @@ static int kswapd(void *p) + order = new_order; + classzone_idx = new_classzone_idx; + } else { +- kswapd_try_to_sleep(pgdat, order, classzone_idx); ++ kswapd_try_to_sleep(pgdat, balanced_order, ++ balanced_classzone_idx); + order = pgdat->kswapd_max_order; + classzone_idx = pgdat->classzone_idx; + pgdat->kswapd_max_order = 0; +@@ -2916,7 +2922,9 @@ static int kswapd(void *p) + */ + if (!ret) { + trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); +- order = balance_pgdat(pgdat, order, &classzone_idx); ++ balanced_classzone_idx = classzone_idx; ++ balanced_order = balance_pgdat(pgdat, order, ++ &balanced_classzone_idx); + } + } + return 0; diff --git a/queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch b/queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch new file mode 100644 index 00000000000..4014f1b5e6b --- /dev/null +++ b/queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch @@ -0,0 +1,525 @@ +From a6bc32b899223a877f595ef9ddc1e89ead5072b8 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 12 Jan 2012 17:19:43 -0800 +Subject: mm: compaction: introduce sync-light migration for use by compaction + +From: Mel Gorman + +commit a6bc32b899223a877f595ef9ddc1e89ead5072b8 upstream. + +Stable note: Not tracked in Buzilla. This was part of a series that + reduced interactivity stalls experienced when THP was enabled. + These stalls were particularly noticable when copying data + to a USB stick but the experiences for users varied a lot. + +This patch adds a lightweight sync migrate operation MIGRATE_SYNC_LIGHT +mode that avoids writing back pages to backing storage. Async compaction +maps to MIGRATE_ASYNC while sync compaction maps to MIGRATE_SYNC_LIGHT. +For other migrate_pages users such as memory hotplug, MIGRATE_SYNC is +used. + +This avoids sync compaction stalling for an excessive length of time, +particularly when copying files to a USB stick where there might be a +large number of dirty pages backed by a filesystem that does not support +->writepages. + +[aarcange@redhat.com: This patch is heavily based on Andrea's work] +[akpm@linux-foundation.org: fix fs/nfs/write.c build] +[akpm@linux-foundation.org: fix fs/btrfs/disk-io.c build] +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Minchan Kim +Cc: Dave Jones +Cc: Jan Kara +Cc: Andy Isaacson +Cc: Nai Xia +Cc: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 5 +-- + fs/hugetlbfs/inode.c | 2 - + fs/nfs/internal.h | 2 - + fs/nfs/write.c | 4 +- + include/linux/fs.h | 6 ++- + include/linux/migrate.h | 23 +++++++++++--- + mm/compaction.c | 2 - + mm/memory-failure.c | 2 - + mm/memory_hotplug.c | 2 - + mm/mempolicy.c | 2 - + mm/migrate.c | 78 ++++++++++++++++++++++++++---------------------- + 11 files changed, 76 insertions(+), 52 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct + + #ifdef CONFIG_MIGRATION + static int btree_migratepage(struct address_space *mapping, +- struct page *newpage, struct page *page, bool sync) ++ struct page *newpage, struct page *page, ++ enum migrate_mode mode) + { + /* + * we can't safely write a btree page from here, +@@ -816,7 +817,7 @@ static int btree_migratepage(struct addr + if (page_has_private(page) && + !try_to_release_page(page, GFP_KERNEL)) + return -EAGAIN; +- return migrate_page(mapping, newpage, page, sync); ++ return migrate_page(mapping, newpage, page, mode); + } + #endif + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -569,7 +569,7 @@ static int hugetlbfs_set_page_dirty(stru + + static int hugetlbfs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, +- bool sync) ++ enum migrate_mode mode) + { + int rc; + +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs + + #ifdef CONFIG_MIGRATION + extern int nfs_migrate_page(struct address_space *, +- struct page *, struct page *, bool); ++ struct page *, struct page *, enum migrate_mode); + #else + #define nfs_migrate_page NULL + #endif +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -1662,7 +1662,7 @@ out_error: + + #ifdef CONFIG_MIGRATION + int nfs_migrate_page(struct address_space *mapping, struct page *newpage, +- struct page *page, bool sync) ++ struct page *page, enum migrate_mode mode) + { + /* + * If PagePrivate is set, then the page is currently associated with +@@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_spac + + nfs_fscache_release_page(page, GFP_KERNEL); + +- return migrate_page(mapping, newpage, page, sync); ++ return migrate_page(mapping, newpage, page, mode); + } + #endif + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -523,6 +523,7 @@ enum positive_aop_returns { + struct page; + struct address_space; + struct writeback_control; ++enum migrate_mode; + + struct iov_iter { + const struct iovec *iov; +@@ -612,7 +613,7 @@ struct address_space_operations { + * is false, it must not block. + */ + int (*migratepage) (struct address_space *, +- struct page *, struct page *, bool); ++ struct page *, struct page *, enum migrate_mode); + int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + unsigned long); +@@ -2481,7 +2482,8 @@ extern int generic_check_addressable(uns + + #ifdef CONFIG_MIGRATION + extern int buffer_migrate_page(struct address_space *, +- struct page *, struct page *, bool); ++ struct page *, struct page *, ++ enum migrate_mode); + #else + #define buffer_migrate_page NULL + #endif +--- a/include/linux/migrate.h ++++ b/include/linux/migrate.h +@@ -6,18 +6,31 @@ + + typedef struct page *new_page_t(struct page *, unsigned long private, int **); + ++/* ++ * MIGRATE_ASYNC means never block ++ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking ++ * on most operations but not ->writepage as the potential stall time ++ * is too significant ++ * MIGRATE_SYNC will block when migrating pages ++ */ ++enum migrate_mode { ++ MIGRATE_ASYNC, ++ MIGRATE_SYNC_LIGHT, ++ MIGRATE_SYNC, ++}; ++ + #ifdef CONFIG_MIGRATION + #define PAGE_MIGRATION 1 + + extern void putback_lru_pages(struct list_head *l); + extern int migrate_page(struct address_space *, +- struct page *, struct page *, bool); ++ struct page *, struct page *, enum migrate_mode); + extern int migrate_pages(struct list_head *l, new_page_t x, + unsigned long private, bool offlining, +- bool sync); ++ enum migrate_mode mode); + extern int migrate_huge_pages(struct list_head *l, new_page_t x, + unsigned long private, bool offlining, +- bool sync); ++ enum migrate_mode mode); + + extern int fail_migrate_page(struct address_space *, + struct page *, struct page *); +@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mappin + static inline void putback_lru_pages(struct list_head *l) {} + static inline int migrate_pages(struct list_head *l, new_page_t x, + unsigned long private, bool offlining, +- bool sync) { return -ENOSYS; } ++ enum migrate_mode mode) { return -ENOSYS; } + static inline int migrate_huge_pages(struct list_head *l, new_page_t x, + unsigned long private, bool offlining, +- bool sync) { return -ENOSYS; } ++ enum migrate_mode mode) { return -ENOSYS; } + + static inline int migrate_prep(void) { return -ENOSYS; } + static inline int migrate_prep_local(void) { return -ENOSYS; } +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -577,7 +577,7 @@ static int compact_zone(struct zone *zon + nr_migrate = cc->nr_migratepages; + err = migrate_pages(&cc->migratepages, compaction_alloc, + (unsigned long)cc, false, +- cc->sync); ++ cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); + update_nr_listpages(cc); + nr_remaining = cc->nr_migratepages; + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1464,7 +1464,7 @@ int soft_offline_page(struct page *page, + page_is_file_cache(page)); + list_add(&page->lru, &pagelist); + ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, +- 0, true); ++ 0, MIGRATE_SYNC); + if (ret) { + putback_lru_pages(&pagelist); + pr_info("soft offline: %#lx: migration failed %d, type %lx\n", +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -747,7 +747,7 @@ do_migrate_range(unsigned long start_pfn + } + /* this function returns # of failed pages */ + ret = migrate_pages(&source, hotremove_migrate_alloc, 0, +- true, true); ++ true, MIGRATE_SYNC); + if (ret) + putback_lru_pages(&source); + } +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -926,7 +926,7 @@ static int migrate_to_node(struct mm_str + + if (!list_empty(&pagelist)) { + err = migrate_pages(&pagelist, new_node_page, dest, +- false, true); ++ false, MIGRATE_SYNC); + if (err) + putback_lru_pages(&pagelist); + } +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -222,12 +222,13 @@ out: + + #ifdef CONFIG_BLOCK + /* Returns true if all buffers are successfully locked */ +-static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync) ++static bool buffer_migrate_lock_buffers(struct buffer_head *head, ++ enum migrate_mode mode) + { + struct buffer_head *bh = head; + + /* Simple case, sync compaction */ +- if (sync) { ++ if (mode != MIGRATE_ASYNC) { + do { + get_bh(bh); + lock_buffer(bh); +@@ -263,7 +264,7 @@ static bool buffer_migrate_lock_buffers( + } + #else + static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, +- bool sync) ++ enum migrate_mode mode) + { + return true; + } +@@ -279,7 +280,7 @@ static inline bool buffer_migrate_lock_b + */ + static int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, +- struct buffer_head *head, bool sync) ++ struct buffer_head *head, enum migrate_mode mode) + { + int expected_count; + void **pslot; +@@ -315,7 +316,8 @@ static int migrate_page_move_mapping(str + * the mapping back due to an elevated page count, we would have to + * block waiting on other references to be dropped. + */ +- if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) { ++ if (mode == MIGRATE_ASYNC && head && ++ !buffer_migrate_lock_buffers(head, mode)) { + page_unfreeze_refs(page, expected_count); + spin_unlock_irq(&mapping->tree_lock); + return -EAGAIN; +@@ -478,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page); + * Pages are locked upon entry and exit. + */ + int migrate_page(struct address_space *mapping, +- struct page *newpage, struct page *page, bool sync) ++ struct page *newpage, struct page *page, ++ enum migrate_mode mode) + { + int rc; + + BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + +- rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync); ++ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); + + if (rc) + return rc; +@@ -501,17 +504,17 @@ EXPORT_SYMBOL(migrate_page); + * exist. + */ + int buffer_migrate_page(struct address_space *mapping, +- struct page *newpage, struct page *page, bool sync) ++ struct page *newpage, struct page *page, enum migrate_mode mode) + { + struct buffer_head *bh, *head; + int rc; + + if (!page_has_buffers(page)) +- return migrate_page(mapping, newpage, page, sync); ++ return migrate_page(mapping, newpage, page, mode); + + head = page_buffers(page); + +- rc = migrate_page_move_mapping(mapping, newpage, page, head, sync); ++ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); + + if (rc) + return rc; +@@ -521,8 +524,8 @@ int buffer_migrate_page(struct address_s + * with an IRQ-safe spinlock held. In the sync case, the buffers + * need to be locked now + */ +- if (sync) +- BUG_ON(!buffer_migrate_lock_buffers(head, sync)); ++ if (mode != MIGRATE_ASYNC) ++ BUG_ON(!buffer_migrate_lock_buffers(head, mode)); + + ClearPagePrivate(page); + set_page_private(newpage, page_private(page)); +@@ -599,10 +602,11 @@ static int writeout(struct address_space + * Default handling if a filesystem does not provide a migration function. + */ + static int fallback_migrate_page(struct address_space *mapping, +- struct page *newpage, struct page *page, bool sync) ++ struct page *newpage, struct page *page, enum migrate_mode mode) + { + if (PageDirty(page)) { +- if (!sync) ++ /* Only writeback pages in full synchronous migration */ ++ if (mode != MIGRATE_SYNC) + return -EBUSY; + return writeout(mapping, page); + } +@@ -615,7 +619,7 @@ static int fallback_migrate_page(struct + !try_to_release_page(page, GFP_KERNEL)) + return -EAGAIN; + +- return migrate_page(mapping, newpage, page, sync); ++ return migrate_page(mapping, newpage, page, mode); + } + + /* +@@ -630,7 +634,7 @@ static int fallback_migrate_page(struct + * == 0 - success + */ + static int move_to_new_page(struct page *newpage, struct page *page, +- int remap_swapcache, bool sync) ++ int remap_swapcache, enum migrate_mode mode) + { + struct address_space *mapping; + int rc; +@@ -651,7 +655,7 @@ static int move_to_new_page(struct page + + mapping = page_mapping(page); + if (!mapping) +- rc = migrate_page(mapping, newpage, page, sync); ++ rc = migrate_page(mapping, newpage, page, mode); + else if (mapping->a_ops->migratepage) + /* + * Most pages have a mapping and most filesystems provide a +@@ -660,9 +664,9 @@ static int move_to_new_page(struct page + * is the most common path for page migration. + */ + rc = mapping->a_ops->migratepage(mapping, +- newpage, page, sync); ++ newpage, page, mode); + else +- rc = fallback_migrate_page(mapping, newpage, page, sync); ++ rc = fallback_migrate_page(mapping, newpage, page, mode); + + if (rc) { + newpage->mapping = NULL; +@@ -677,7 +681,7 @@ static int move_to_new_page(struct page + } + + static int __unmap_and_move(struct page *page, struct page *newpage, +- int force, bool offlining, bool sync) ++ int force, bool offlining, enum migrate_mode mode) + { + int rc = -EAGAIN; + int remap_swapcache = 1; +@@ -686,7 +690,7 @@ static int __unmap_and_move(struct page + struct anon_vma *anon_vma = NULL; + + if (!trylock_page(page)) { +- if (!force || !sync) ++ if (!force || mode == MIGRATE_ASYNC) + goto out; + + /* +@@ -732,10 +736,12 @@ static int __unmap_and_move(struct page + + if (PageWriteback(page)) { + /* +- * For !sync, there is no point retrying as the retry loop +- * is expected to be too short for PageWriteback to be cleared ++ * Only in the case of a full syncronous migration is it ++ * necessary to wait for PageWriteback. In the async case, ++ * the retry loop is too short and in the sync-light case, ++ * the overhead of stalling is too much + */ +- if (!sync) { ++ if (mode != MIGRATE_SYNC) { + rc = -EBUSY; + goto uncharge; + } +@@ -806,7 +812,7 @@ static int __unmap_and_move(struct page + + skip_unmap: + if (!page_mapped(page)) +- rc = move_to_new_page(newpage, page, remap_swapcache, sync); ++ rc = move_to_new_page(newpage, page, remap_swapcache, mode); + + if (rc && remap_swapcache) + remove_migration_ptes(page, page); +@@ -829,7 +835,8 @@ out: + * to the newly allocated page in newpage. + */ + static int unmap_and_move(new_page_t get_new_page, unsigned long private, +- struct page *page, int force, bool offlining, bool sync) ++ struct page *page, int force, bool offlining, ++ enum migrate_mode mode) + { + int rc = 0; + int *result = NULL; +@@ -847,7 +854,7 @@ static int unmap_and_move(new_page_t get + if (unlikely(split_huge_page(page))) + goto out; + +- rc = __unmap_and_move(page, newpage, force, offlining, sync); ++ rc = __unmap_and_move(page, newpage, force, offlining, mode); + out: + if (rc != -EAGAIN) { + /* +@@ -895,7 +902,8 @@ out: + */ + static int unmap_and_move_huge_page(new_page_t get_new_page, + unsigned long private, struct page *hpage, +- int force, bool offlining, bool sync) ++ int force, bool offlining, ++ enum migrate_mode mode) + { + int rc = 0; + int *result = NULL; +@@ -908,7 +916,7 @@ static int unmap_and_move_huge_page(new_ + rc = -EAGAIN; + + if (!trylock_page(hpage)) { +- if (!force || !sync) ++ if (!force || mode != MIGRATE_SYNC) + goto out; + lock_page(hpage); + } +@@ -919,7 +927,7 @@ static int unmap_and_move_huge_page(new_ + try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + + if (!page_mapped(hpage)) +- rc = move_to_new_page(new_hpage, hpage, 1, sync); ++ rc = move_to_new_page(new_hpage, hpage, 1, mode); + + if (rc) + remove_migration_ptes(hpage, hpage); +@@ -962,7 +970,7 @@ out: + */ + int migrate_pages(struct list_head *from, + new_page_t get_new_page, unsigned long private, bool offlining, +- bool sync) ++ enum migrate_mode mode) + { + int retry = 1; + int nr_failed = 0; +@@ -983,7 +991,7 @@ int migrate_pages(struct list_head *from + + rc = unmap_and_move(get_new_page, private, + page, pass > 2, offlining, +- sync); ++ mode); + + switch(rc) { + case -ENOMEM: +@@ -1013,7 +1021,7 @@ out: + + int migrate_huge_pages(struct list_head *from, + new_page_t get_new_page, unsigned long private, bool offlining, +- bool sync) ++ enum migrate_mode mode) + { + int retry = 1; + int nr_failed = 0; +@@ -1030,7 +1038,7 @@ int migrate_huge_pages(struct list_head + + rc = unmap_and_move_huge_page(get_new_page, + private, page, pass > 2, offlining, +- sync); ++ mode); + + switch(rc) { + case -ENOMEM: +@@ -1159,7 +1167,7 @@ set_status: + err = 0; + if (!list_empty(&pagelist)) { + err = migrate_pages(&pagelist, new_page_node, +- (unsigned long)pm, 0, true); ++ (unsigned long)pm, 0, MIGRATE_SYNC); + if (err) + putback_lru_pages(&pagelist); + } diff --git a/queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch b/queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch new file mode 100644 index 00000000000..4e3c1619e80 --- /dev/null +++ b/queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch @@ -0,0 +1,110 @@ +From c82449352854ff09e43062246af86bdeb628f0c3 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 12 Jan 2012 17:19:38 -0800 +Subject: mm: compaction: make isolate_lru_page() filter-aware again + +From: Mel Gorman + +commit c82449352854ff09e43062246af86bdeb628f0c3 upstream. + +Stable note: Not tracked in Bugzilla. A fix aimed at preserving page aging + information by reducing LRU list churning had the side-effect of + reducing THP allocation success rates. This was part of a series + to restore the success rates while preserving the reclaim fix. + +Commit 39deaf85 ("mm: compaction: make isolate_lru_page() filter-aware") +noted that compaction does not migrate dirty or writeback pages and that +is was meaningless to pick the page and re-add it to the LRU list. This +had to be partially reverted because some dirty pages can be migrated by +compaction without blocking. + +This patch updates "mm: compaction: make isolate_lru_page" by skipping +over pages that migration has no possibility of migrating to minimise LRU +disruption. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Reviewed-by: Minchan Kim +Cc: Dave Jones +Cc: Jan Kara +Cc: Andy Isaacson +Cc: Nai Xia +Cc: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mmzone.h | 2 ++ + mm/compaction.c | 3 +++ + mm/vmscan.c | 35 +++++++++++++++++++++++++++++++++-- + 3 files changed, 38 insertions(+), 2 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -166,6 +166,8 @@ static inline int is_unevictable_lru(enu + #define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) + /* Isolate unmapped file */ + #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) ++/* Isolate for asynchronous migration */ ++#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) + + /* LRU Isolation modes. */ + typedef unsigned __bitwise__ isolate_mode_t; +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -371,6 +371,9 @@ static isolate_migrate_t isolate_migrate + continue; + } + ++ if (!cc->sync) ++ mode |= ISOLATE_ASYNC_MIGRATE; ++ + /* Try isolate the page */ + if (__isolate_lru_page(page, mode, 0) != 0) + continue; +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1045,8 +1045,39 @@ int __isolate_lru_page(struct page *page + + ret = -EBUSY; + +- if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) +- return ret; ++ /* ++ * To minimise LRU disruption, the caller can indicate that it only ++ * wants to isolate pages it will be able to operate on without ++ * blocking - clean pages for the most part. ++ * ++ * ISOLATE_CLEAN means that only clean pages should be isolated. This ++ * is used by reclaim when it is cannot write to backing storage ++ * ++ * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages ++ * that it is possible to migrate without blocking ++ */ ++ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { ++ /* All the caller can do on PageWriteback is block */ ++ if (PageWriteback(page)) ++ return ret; ++ ++ if (PageDirty(page)) { ++ struct address_space *mapping; ++ ++ /* ISOLATE_CLEAN means only clean pages */ ++ if (mode & ISOLATE_CLEAN) ++ return ret; ++ ++ /* ++ * Only pages without mappings or that have a ++ * ->migratepage callback are possible to migrate ++ * without blocking ++ */ ++ mapping = page_mapping(page); ++ if (mapping && !mapping->a_ops->migratepage) ++ return ret; ++ } ++ } + + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) + return ret; diff --git a/queue-3.0/mm-migration-clean-up-unmap_and_move.patch b/queue-3.0/mm-migration-clean-up-unmap_and_move.patch index 9f903707a39..f05ecd61b1f 100644 --- a/queue-3.0/mm-migration-clean-up-unmap_and_move.patch +++ b/queue-3.0/mm-migration-clean-up-unmap_and_move.patch @@ -22,6 +22,8 @@ Cc: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 75 +++++++++++++++++++++++++++++++---------------------------- diff --git a/queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch b/queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch new file mode 100644 index 00000000000..c45ad15e548 --- /dev/null +++ b/queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch @@ -0,0 +1,140 @@ +From 66199712e9eef5aede09dbcd9dfff87798a66917 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Thu, 12 Jan 2012 17:19:41 -0800 +Subject: mm: page allocator: do not call direct reclaim for THP allocations while compaction is deferred + +From: Mel Gorman + +commit 66199712e9eef5aede09dbcd9dfff87798a66917 upstream. + +Stable note: Not tracked in Buzilla. This was part of a series that + reduced interactivity stalls experienced when THP was enabled. + +If compaction is deferred, direct reclaim is used to try to free enough +pages for the allocation to succeed. For small high-orders, this has a +reasonable chance of success. However, if the caller has specified +__GFP_NO_KSWAPD to limit the disruption to the system, it makes more sense +to fail the allocation rather than stall the caller in direct reclaim. +This patch skips direct reclaim if compaction is deferred and the caller +specifies __GFP_NO_KSWAPD. + +Async compaction only considers a subset of pages so it is possible for +compaction to be deferred prematurely and not enter direct reclaim even in +cases where it should. To compensate for this, this patch also defers +compaction only if sync compaction failed. + +Signed-off-by: Mel Gorman +Acked-by: Minchan Kim +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Dave Jones +Cc: Jan Kara +Cc: Andy Isaacson +Cc: Nai Xia +Cc: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 45 +++++++++++++++++++++++++++++++++++---------- + 1 file changed, 35 insertions(+), 10 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1897,14 +1897,20 @@ static struct page * + __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, +- int migratetype, unsigned long *did_some_progress, +- bool sync_migration) ++ int migratetype, bool sync_migration, ++ bool *deferred_compaction, ++ unsigned long *did_some_progress) + { + struct page *page; + +- if (!order || compaction_deferred(preferred_zone)) ++ if (!order) + return NULL; + ++ if (compaction_deferred(preferred_zone)) { ++ *deferred_compaction = true; ++ return NULL; ++ } ++ + current->flags |= PF_MEMALLOC; + *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, + nodemask, sync_migration); +@@ -1932,7 +1938,13 @@ __alloc_pages_direct_compact(gfp_t gfp_m + * but not enough to satisfy watermarks. + */ + count_vm_event(COMPACTFAIL); +- defer_compaction(preferred_zone); ++ ++ /* ++ * As async compaction considers a subset of pageblocks, only ++ * defer if the failure was a sync compaction failure. ++ */ ++ if (sync_migration) ++ defer_compaction(preferred_zone); + + cond_resched(); + } +@@ -1944,8 +1956,9 @@ static inline struct page * + __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, +- int migratetype, unsigned long *did_some_progress, +- bool sync_migration) ++ int migratetype, bool sync_migration, ++ bool *deferred_compaction, ++ unsigned long *did_some_progress) + { + return NULL; + } +@@ -2095,6 +2108,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u + unsigned long pages_reclaimed = 0; + unsigned long did_some_progress; + bool sync_migration = false; ++ bool deferred_compaction = false; + + /* + * In the slowpath, we sanity check order to avoid ever trying to +@@ -2175,12 +2189,22 @@ rebalance: + zonelist, high_zoneidx, + nodemask, + alloc_flags, preferred_zone, +- migratetype, &did_some_progress, +- sync_migration); ++ migratetype, sync_migration, ++ &deferred_compaction, ++ &did_some_progress); + if (page) + goto got_pg; + sync_migration = true; + ++ /* ++ * If compaction is deferred for high-order allocations, it is because ++ * sync compaction recently failed. In this is the case and the caller ++ * has requested the system not be heavily disrupted, fail the ++ * allocation now instead of entering direct reclaim ++ */ ++ if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) ++ goto nopage; ++ + /* Try direct reclaim and then allocating */ + page = __alloc_pages_direct_reclaim(gfp_mask, order, + zonelist, high_zoneidx, +@@ -2243,8 +2267,9 @@ rebalance: + zonelist, high_zoneidx, + nodemask, + alloc_flags, preferred_zone, +- migratetype, &did_some_progress, +- sync_migration); ++ migratetype, sync_migration, ++ &deferred_compaction, ++ &did_some_progress); + if (page) + goto got_pg; + } diff --git a/queue-3.0/series b/queue-3.0/series index 30fcb6914bb..b7428b7c8f4 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -21,3 +21,8 @@ mm-zone_reclaim-make-isolate_lru_page-filter-aware.patch mm-migration-clean-up-unmap_and_move.patch mm-compaction-allow-compaction-to-isolate-dirty-pages.patch mm-compaction-determine-if-dirty-pages-can-be-migrated-without-blocking-within-migratepage.patch +mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch +mm-compaction-make-isolate_lru_page-filter-aware-again.patch +kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch +kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch +mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch -- 2.47.3