]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 25 Jul 2012 18:21:45 +0000 (11:21 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 25 Jul 2012 18:21:45 +0000 (11:21 -0700)
added patches:
kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch
kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch
mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch
mm-compaction-make-isolate_lru_page-filter-aware-again.patch
mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch

queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch [new file with mode: 0644]
queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch [new file with mode: 0644]
queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch [new file with mode: 0644]
queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch [new file with mode: 0644]
queue-3.0/mm-migration-clean-up-unmap_and_move.patch
queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch [new file with mode: 0644]
queue-3.0/series

diff --git a/queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch b/queue-3.0/kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch
new file mode 100644 (file)
index 0000000..fc06831
--- /dev/null
@@ -0,0 +1,61 @@
+From f0dfcde099453aa4c0dc42473828d15a6d492936 Mon Sep 17 00:00:00 2001
+From: Alex Shi <alex.shi@intel.com>
+Date: Mon, 31 Oct 2011 17:08:45 -0700
+Subject: kswapd: assign new_order and new_classzone_idx after wakeup in sleeping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@intel.com>
+
+commit f0dfcde099453aa4c0dc42473828d15a6d492936 upstream.
+
+Stable note: Fixes https://bugzilla.redhat.com/show_bug.cgi?id=712019.  This
+       patch reduces kswapd CPU usage.
+
+There 2 places to read pgdat in kswapd.  One is return from a successful
+balance, another is waked up from kswapd sleeping.  The new_order and
+new_classzone_idx represent the balance input order and classzone_idx.
+
+But current new_order and new_classzone_idx are not assigned after
+kswapd_try_to_sleep(), that will cause a bug in the following scenario.
+
+1: after a successful balance, kswapd goes to sleep, and new_order = 0;
+   new_classzone_idx = __MAX_NR_ZONES - 1;
+
+2: kswapd waked up with order = 3 and classzone_idx = ZONE_NORMAL
+
+3: in the balance_pgdat() running, a new balance wakeup happened with
+   order = 5, and classzone_idx = ZONE_NORMAL
+
+4: the first wakeup(order = 3) finished successufly, return order = 3
+   but, the new_order is still 0, so, this balancing will be treated as a
+   failed balance.  And then the second tighter balancing will be missed.
+
+So, to avoid the above problem, the new_order and new_classzone_idx need
+to be assigned for later successful comparison.
+
+Signed-off-by: Alex Shi <alex.shi@intel.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Tested-by: Pádraig Brady <P@draigBrady.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2908,6 +2908,8 @@ static int kswapd(void *p)
+                                               balanced_classzone_idx);
+                       order = pgdat->kswapd_max_order;
+                       classzone_idx = pgdat->classzone_idx;
++                      new_order = order;
++                      new_classzone_idx = classzone_idx;
+                       pgdat->kswapd_max_order = 0;
+                       pgdat->classzone_idx = pgdat->nr_zones - 1;
+               }
diff --git a/queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch b/queue-3.0/kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch
new file mode 100644 (file)
index 0000000..c5a3096
--- /dev/null
@@ -0,0 +1,96 @@
+From d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b Mon Sep 17 00:00:00 2001
+From: Alex Shi <alex.shi@intel.com>
+Date: Mon, 31 Oct 2011 17:08:39 -0700
+Subject: kswapd: avoid unnecessary rebalance after an unsuccessful balancing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Shi <alex.shi@intel.com>
+
+commit d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b upstream.
+
+Stable note: Fixes https://bugzilla.redhat.com/show_bug.cgi?id=712019.  This
+       patch reduces kswapd CPU usage.
+
+In commit 215ddd66 ("mm: vmscan: only read new_classzone_idx from pgdat
+when reclaiming successfully") , Mel Gorman said kswapd is better to sleep
+after a unsuccessful balancing if there is tighter reclaim request pending
+in the balancing.  But in the following scenario, kswapd do something that
+is not matched our expectation.  The patch fixes this issue.
+
+1, Read pgdat request A (classzone_idx, order = 3)
+2, balance_pgdat()
+3, During pgdat, a new pgdat request B (classzone_idx, order = 5) is placed
+4, balance_pgdat() returns but failed since returned order = 0
+5, pgdat of request A assigned to balance_pgdat(), and do balancing again.
+   While the expectation behavior of kswapd should try to sleep.
+
+Signed-off-by: Alex Shi <alex.shi@intel.com>
+Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Tested-by: Pádraig Brady <P@draigBrady.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2844,7 +2844,9 @@ static void kswapd_try_to_sleep(pg_data_
+ static int kswapd(void *p)
+ {
+       unsigned long order, new_order;
++      unsigned balanced_order;
+       int classzone_idx, new_classzone_idx;
++      int balanced_classzone_idx;
+       pg_data_t *pgdat = (pg_data_t*)p;
+       struct task_struct *tsk = current;
+@@ -2875,7 +2877,9 @@ static int kswapd(void *p)
+       set_freezable();
+       order = new_order = 0;
++      balanced_order = 0;
+       classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
++      balanced_classzone_idx = classzone_idx;
+       for ( ; ; ) {
+               int ret;
+@@ -2884,7 +2888,8 @@ static int kswapd(void *p)
+                * new request of a similar or harder type will succeed soon
+                * so consider going to sleep on the basis we reclaimed at
+                */
+-              if (classzone_idx >= new_classzone_idx && order == new_order) {
++              if (balanced_classzone_idx >= new_classzone_idx &&
++                                      balanced_order == new_order) {
+                       new_order = pgdat->kswapd_max_order;
+                       new_classzone_idx = pgdat->classzone_idx;
+                       pgdat->kswapd_max_order =  0;
+@@ -2899,7 +2904,8 @@ static int kswapd(void *p)
+                       order = new_order;
+                       classzone_idx = new_classzone_idx;
+               } else {
+-                      kswapd_try_to_sleep(pgdat, order, classzone_idx);
++                      kswapd_try_to_sleep(pgdat, balanced_order,
++                                              balanced_classzone_idx);
+                       order = pgdat->kswapd_max_order;
+                       classzone_idx = pgdat->classzone_idx;
+                       pgdat->kswapd_max_order = 0;
+@@ -2916,7 +2922,9 @@ static int kswapd(void *p)
+                */
+               if (!ret) {
+                       trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
+-                      order = balance_pgdat(pgdat, order, &classzone_idx);
++                      balanced_classzone_idx = classzone_idx;
++                      balanced_order = balance_pgdat(pgdat, order,
++                                              &balanced_classzone_idx);
+               }
+       }
+       return 0;
diff --git a/queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch b/queue-3.0/mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch
new file mode 100644 (file)
index 0000000..4014f1b
--- /dev/null
@@ -0,0 +1,525 @@
+From a6bc32b899223a877f595ef9ddc1e89ead5072b8 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Thu, 12 Jan 2012 17:19:43 -0800
+Subject: mm: compaction: introduce sync-light migration for use by compaction
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit a6bc32b899223a877f595ef9ddc1e89ead5072b8 upstream.
+
+Stable note: Not tracked in Buzilla. This was part of a series that
+       reduced interactivity stalls experienced when THP was enabled.
+       These stalls were particularly noticable when copying data
+       to a USB stick but the experiences for users varied a lot.
+
+This patch adds a lightweight sync migrate operation MIGRATE_SYNC_LIGHT
+mode that avoids writing back pages to backing storage.  Async compaction
+maps to MIGRATE_ASYNC while sync compaction maps to MIGRATE_SYNC_LIGHT.
+For other migrate_pages users such as memory hotplug, MIGRATE_SYNC is
+used.
+
+This avoids sync compaction stalling for an excessive length of time,
+particularly when copying files to a USB stick where there might be a
+large number of dirty pages backed by a filesystem that does not support
+->writepages.
+
+[aarcange@redhat.com: This patch is heavily based on Andrea's work]
+[akpm@linux-foundation.org: fix fs/nfs/write.c build]
+[akpm@linux-foundation.org: fix fs/btrfs/disk-io.c build]
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Minchan Kim <minchan.kim@gmail.com>
+Cc: Dave Jones <davej@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Andy Isaacson <adi@hexapodia.org>
+Cc: Nai Xia <nai.xia@gmail.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c      |    5 +--
+ fs/hugetlbfs/inode.c    |    2 -
+ fs/nfs/internal.h       |    2 -
+ fs/nfs/write.c          |    4 +-
+ include/linux/fs.h      |    6 ++-
+ include/linux/migrate.h |   23 +++++++++++---
+ mm/compaction.c         |    2 -
+ mm/memory-failure.c     |    2 -
+ mm/memory_hotplug.c     |    2 -
+ mm/mempolicy.c          |    2 -
+ mm/migrate.c            |   78 ++++++++++++++++++++++++++----------------------
+ 11 files changed, 76 insertions(+), 52 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct
+ #ifdef CONFIG_MIGRATION
+ static int btree_migratepage(struct address_space *mapping,
+-                      struct page *newpage, struct page *page, bool sync)
++                      struct page *newpage, struct page *page,
++                      enum migrate_mode mode)
+ {
+       /*
+        * we can't safely write a btree page from here,
+@@ -816,7 +817,7 @@ static int btree_migratepage(struct addr
+       if (page_has_private(page) &&
+           !try_to_release_page(page, GFP_KERNEL))
+               return -EAGAIN;
+-      return migrate_page(mapping, newpage, page, sync);
++      return migrate_page(mapping, newpage, page, mode);
+ }
+ #endif
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -569,7 +569,7 @@ static int hugetlbfs_set_page_dirty(stru
+ static int hugetlbfs_migrate_page(struct address_space *mapping,
+                               struct page *newpage, struct page *page,
+-                              bool sync)
++                              enum migrate_mode mode)
+ {
+       int rc;
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+-              struct page *, struct page *, bool);
++              struct page *, struct page *, enum migrate_mode);
+ #else
+ #define nfs_migrate_page NULL
+ #endif
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1662,7 +1662,7 @@ out_error:
+ #ifdef CONFIG_MIGRATION
+ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
+-              struct page *page, bool sync)
++              struct page *page, enum migrate_mode mode)
+ {
+       /*
+        * If PagePrivate is set, then the page is currently associated with
+@@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_spac
+       nfs_fscache_release_page(page, GFP_KERNEL);
+-      return migrate_page(mapping, newpage, page, sync);
++      return migrate_page(mapping, newpage, page, mode);
+ }
+ #endif
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -523,6 +523,7 @@ enum positive_aop_returns {
+ struct page;
+ struct address_space;
+ struct writeback_control;
++enum migrate_mode;
+ struct iov_iter {
+       const struct iovec *iov;
+@@ -612,7 +613,7 @@ struct address_space_operations {
+        * is false, it must not block.
+        */
+       int (*migratepage) (struct address_space *,
+-                      struct page *, struct page *, bool);
++                      struct page *, struct page *, enum migrate_mode);
+       int (*launder_page) (struct page *);
+       int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+                                       unsigned long);
+@@ -2481,7 +2482,8 @@ extern int generic_check_addressable(uns
+ #ifdef CONFIG_MIGRATION
+ extern int buffer_migrate_page(struct address_space *,
+-                              struct page *, struct page *, bool);
++                              struct page *, struct page *,
++                              enum migrate_mode);
+ #else
+ #define buffer_migrate_page NULL
+ #endif
+--- a/include/linux/migrate.h
++++ b/include/linux/migrate.h
+@@ -6,18 +6,31 @@
+ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
++/*
++ * MIGRATE_ASYNC means never block
++ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
++ *    on most operations but not ->writepage as the potential stall time
++ *    is too significant
++ * MIGRATE_SYNC will block when migrating pages
++ */
++enum migrate_mode {
++      MIGRATE_ASYNC,
++      MIGRATE_SYNC_LIGHT,
++      MIGRATE_SYNC,
++};
++
+ #ifdef CONFIG_MIGRATION
+ #define PAGE_MIGRATION 1
+ extern void putback_lru_pages(struct list_head *l);
+ extern int migrate_page(struct address_space *,
+-                      struct page *, struct page *, bool);
++                      struct page *, struct page *, enum migrate_mode);
+ extern int migrate_pages(struct list_head *l, new_page_t x,
+                       unsigned long private, bool offlining,
+-                      bool sync);
++                      enum migrate_mode mode);
+ extern int migrate_huge_pages(struct list_head *l, new_page_t x,
+                       unsigned long private, bool offlining,
+-                      bool sync);
++                      enum migrate_mode mode);
+ extern int fail_migrate_page(struct address_space *,
+                       struct page *, struct page *);
+@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mappin
+ static inline void putback_lru_pages(struct list_head *l) {}
+ static inline int migrate_pages(struct list_head *l, new_page_t x,
+               unsigned long private, bool offlining,
+-              bool sync) { return -ENOSYS; }
++              enum migrate_mode mode) { return -ENOSYS; }
+ static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
+               unsigned long private, bool offlining,
+-              bool sync) { return -ENOSYS; }
++              enum migrate_mode mode) { return -ENOSYS; }
+ static inline int migrate_prep(void) { return -ENOSYS; }
+ static inline int migrate_prep_local(void) { return -ENOSYS; }
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -577,7 +577,7 @@ static int compact_zone(struct zone *zon
+               nr_migrate = cc->nr_migratepages;
+               err = migrate_pages(&cc->migratepages, compaction_alloc,
+                               (unsigned long)cc, false,
+-                              cc->sync);
++                              cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+               update_nr_listpages(cc);
+               nr_remaining = cc->nr_migratepages;
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1464,7 +1464,7 @@ int soft_offline_page(struct page *page,
+                                           page_is_file_cache(page));
+               list_add(&page->lru, &pagelist);
+               ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+-                                                              0, true);
++                                                      0, MIGRATE_SYNC);
+               if (ret) {
+                       putback_lru_pages(&pagelist);
+                       pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -747,7 +747,7 @@ do_migrate_range(unsigned long start_pfn
+               }
+               /* this function returns # of failed pages */
+               ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
+-                                                              true, true);
++                                                      true, MIGRATE_SYNC);
+               if (ret)
+                       putback_lru_pages(&source);
+       }
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -926,7 +926,7 @@ static int migrate_to_node(struct mm_str
+       if (!list_empty(&pagelist)) {
+               err = migrate_pages(&pagelist, new_node_page, dest,
+-                                                              false, true);
++                                                      false, MIGRATE_SYNC);
+               if (err)
+                       putback_lru_pages(&pagelist);
+       }
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -222,12 +222,13 @@ out:
+ #ifdef CONFIG_BLOCK
+ /* Returns true if all buffers are successfully locked */
+-static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
++static bool buffer_migrate_lock_buffers(struct buffer_head *head,
++                                                      enum migrate_mode mode)
+ {
+       struct buffer_head *bh = head;
+       /* Simple case, sync compaction */
+-      if (sync) {
++      if (mode != MIGRATE_ASYNC) {
+               do {
+                       get_bh(bh);
+                       lock_buffer(bh);
+@@ -263,7 +264,7 @@ static bool buffer_migrate_lock_buffers(
+ }
+ #else
+ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
+-                                                              bool sync)
++                                                      enum migrate_mode mode)
+ {
+       return true;
+ }
+@@ -279,7 +280,7 @@ static inline bool buffer_migrate_lock_b
+  */
+ static int migrate_page_move_mapping(struct address_space *mapping,
+               struct page *newpage, struct page *page,
+-              struct buffer_head *head, bool sync)
++              struct buffer_head *head, enum migrate_mode mode)
+ {
+       int expected_count;
+       void **pslot;
+@@ -315,7 +316,8 @@ static int migrate_page_move_mapping(str
+        * the mapping back due to an elevated page count, we would have to
+        * block waiting on other references to be dropped.
+        */
+-      if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
++      if (mode == MIGRATE_ASYNC && head &&
++                      !buffer_migrate_lock_buffers(head, mode)) {
+               page_unfreeze_refs(page, expected_count);
+               spin_unlock_irq(&mapping->tree_lock);
+               return -EAGAIN;
+@@ -478,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page);
+  * Pages are locked upon entry and exit.
+  */
+ int migrate_page(struct address_space *mapping,
+-              struct page *newpage, struct page *page, bool sync)
++              struct page *newpage, struct page *page,
++              enum migrate_mode mode)
+ {
+       int rc;
+       BUG_ON(PageWriteback(page));    /* Writeback must be complete */
+-      rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
++      rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+       if (rc)
+               return rc;
+@@ -501,17 +504,17 @@ EXPORT_SYMBOL(migrate_page);
+  * exist.
+  */
+ int buffer_migrate_page(struct address_space *mapping,
+-              struct page *newpage, struct page *page, bool sync)
++              struct page *newpage, struct page *page, enum migrate_mode mode)
+ {
+       struct buffer_head *bh, *head;
+       int rc;
+       if (!page_has_buffers(page))
+-              return migrate_page(mapping, newpage, page, sync);
++              return migrate_page(mapping, newpage, page, mode);
+       head = page_buffers(page);
+-      rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
++      rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+       if (rc)
+               return rc;
+@@ -521,8 +524,8 @@ int buffer_migrate_page(struct address_s
+        * with an IRQ-safe spinlock held. In the sync case, the buffers
+        * need to be locked now
+        */
+-      if (sync)
+-              BUG_ON(!buffer_migrate_lock_buffers(head, sync));
++      if (mode != MIGRATE_ASYNC)
++              BUG_ON(!buffer_migrate_lock_buffers(head, mode));
+       ClearPagePrivate(page);
+       set_page_private(newpage, page_private(page));
+@@ -599,10 +602,11 @@ static int writeout(struct address_space
+  * Default handling if a filesystem does not provide a migration function.
+  */
+ static int fallback_migrate_page(struct address_space *mapping,
+-      struct page *newpage, struct page *page, bool sync)
++      struct page *newpage, struct page *page, enum migrate_mode mode)
+ {
+       if (PageDirty(page)) {
+-              if (!sync)
++              /* Only writeback pages in full synchronous migration */
++              if (mode != MIGRATE_SYNC)
+                       return -EBUSY;
+               return writeout(mapping, page);
+       }
+@@ -615,7 +619,7 @@ static int fallback_migrate_page(struct
+           !try_to_release_page(page, GFP_KERNEL))
+               return -EAGAIN;
+-      return migrate_page(mapping, newpage, page, sync);
++      return migrate_page(mapping, newpage, page, mode);
+ }
+ /*
+@@ -630,7 +634,7 @@ static int fallback_migrate_page(struct
+  *  == 0 - success
+  */
+ static int move_to_new_page(struct page *newpage, struct page *page,
+-                                      int remap_swapcache, bool sync)
++                              int remap_swapcache, enum migrate_mode mode)
+ {
+       struct address_space *mapping;
+       int rc;
+@@ -651,7 +655,7 @@ static int move_to_new_page(struct page
+       mapping = page_mapping(page);
+       if (!mapping)
+-              rc = migrate_page(mapping, newpage, page, sync);
++              rc = migrate_page(mapping, newpage, page, mode);
+       else if (mapping->a_ops->migratepage)
+               /*
+                * Most pages have a mapping and most filesystems provide a
+@@ -660,9 +664,9 @@ static int move_to_new_page(struct page
+                * is the most common path for page migration.
+                */
+               rc = mapping->a_ops->migratepage(mapping,
+-                                              newpage, page, sync);
++                                              newpage, page, mode);
+       else
+-              rc = fallback_migrate_page(mapping, newpage, page, sync);
++              rc = fallback_migrate_page(mapping, newpage, page, mode);
+       if (rc) {
+               newpage->mapping = NULL;
+@@ -677,7 +681,7 @@ static int move_to_new_page(struct page
+ }
+ static int __unmap_and_move(struct page *page, struct page *newpage,
+-                              int force, bool offlining, bool sync)
++                      int force, bool offlining, enum migrate_mode mode)
+ {
+       int rc = -EAGAIN;
+       int remap_swapcache = 1;
+@@ -686,7 +690,7 @@ static int __unmap_and_move(struct page
+       struct anon_vma *anon_vma = NULL;
+       if (!trylock_page(page)) {
+-              if (!force || !sync)
++              if (!force || mode == MIGRATE_ASYNC)
+                       goto out;
+               /*
+@@ -732,10 +736,12 @@ static int __unmap_and_move(struct page
+       if (PageWriteback(page)) {
+               /*
+-               * For !sync, there is no point retrying as the retry loop
+-               * is expected to be too short for PageWriteback to be cleared
++               * Only in the case of a full syncronous migration is it
++               * necessary to wait for PageWriteback. In the async case,
++               * the retry loop is too short and in the sync-light case,
++               * the overhead of stalling is too much
+                */
+-              if (!sync) {
++              if (mode != MIGRATE_SYNC) {
+                       rc = -EBUSY;
+                       goto uncharge;
+               }
+@@ -806,7 +812,7 @@ static int __unmap_and_move(struct page
+ skip_unmap:
+       if (!page_mapped(page))
+-              rc = move_to_new_page(newpage, page, remap_swapcache, sync);
++              rc = move_to_new_page(newpage, page, remap_swapcache, mode);
+       if (rc && remap_swapcache)
+               remove_migration_ptes(page, page);
+@@ -829,7 +835,8 @@ out:
+  * to the newly allocated page in newpage.
+  */
+ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+-                      struct page *page, int force, bool offlining, bool sync)
++                      struct page *page, int force, bool offlining,
++                      enum migrate_mode mode)
+ {
+       int rc = 0;
+       int *result = NULL;
+@@ -847,7 +854,7 @@ static int unmap_and_move(new_page_t get
+               if (unlikely(split_huge_page(page)))
+                       goto out;
+-      rc = __unmap_and_move(page, newpage, force, offlining, sync);
++      rc = __unmap_and_move(page, newpage, force, offlining, mode);
+ out:
+       if (rc != -EAGAIN) {
+               /*
+@@ -895,7 +902,8 @@ out:
+  */
+ static int unmap_and_move_huge_page(new_page_t get_new_page,
+                               unsigned long private, struct page *hpage,
+-                              int force, bool offlining, bool sync)
++                              int force, bool offlining,
++                              enum migrate_mode mode)
+ {
+       int rc = 0;
+       int *result = NULL;
+@@ -908,7 +916,7 @@ static int unmap_and_move_huge_page(new_
+       rc = -EAGAIN;
+       if (!trylock_page(hpage)) {
+-              if (!force || !sync)
++              if (!force || mode != MIGRATE_SYNC)
+                       goto out;
+               lock_page(hpage);
+       }
+@@ -919,7 +927,7 @@ static int unmap_and_move_huge_page(new_
+       try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+       if (!page_mapped(hpage))
+-              rc = move_to_new_page(new_hpage, hpage, 1, sync);
++              rc = move_to_new_page(new_hpage, hpage, 1, mode);
+       if (rc)
+               remove_migration_ptes(hpage, hpage);
+@@ -962,7 +970,7 @@ out:
+  */
+ int migrate_pages(struct list_head *from,
+               new_page_t get_new_page, unsigned long private, bool offlining,
+-              bool sync)
++              enum migrate_mode mode)
+ {
+       int retry = 1;
+       int nr_failed = 0;
+@@ -983,7 +991,7 @@ int migrate_pages(struct list_head *from
+                       rc = unmap_and_move(get_new_page, private,
+                                               page, pass > 2, offlining,
+-                                              sync);
++                                              mode);
+                       switch(rc) {
+                       case -ENOMEM:
+@@ -1013,7 +1021,7 @@ out:
+ int migrate_huge_pages(struct list_head *from,
+               new_page_t get_new_page, unsigned long private, bool offlining,
+-              bool sync)
++              enum migrate_mode mode)
+ {
+       int retry = 1;
+       int nr_failed = 0;
+@@ -1030,7 +1038,7 @@ int migrate_huge_pages(struct list_head
+                       rc = unmap_and_move_huge_page(get_new_page,
+                                       private, page, pass > 2, offlining,
+-                                      sync);
++                                      mode);
+                       switch(rc) {
+                       case -ENOMEM:
+@@ -1159,7 +1167,7 @@ set_status:
+       err = 0;
+       if (!list_empty(&pagelist)) {
+               err = migrate_pages(&pagelist, new_page_node,
+-                              (unsigned long)pm, 0, true);
++                              (unsigned long)pm, 0, MIGRATE_SYNC);
+               if (err)
+                       putback_lru_pages(&pagelist);
+       }
diff --git a/queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch b/queue-3.0/mm-compaction-make-isolate_lru_page-filter-aware-again.patch
new file mode 100644 (file)
index 0000000..4e3c161
--- /dev/null
@@ -0,0 +1,110 @@
+From c82449352854ff09e43062246af86bdeb628f0c3 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Thu, 12 Jan 2012 17:19:38 -0800
+Subject: mm: compaction: make isolate_lru_page() filter-aware again
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit c82449352854ff09e43062246af86bdeb628f0c3 upstream.
+
+Stable note: Not tracked in Bugzilla. A fix aimed at preserving page aging
+       information by reducing LRU list churning had the side-effect of
+       reducing THP allocation success rates. This was part of a series
+       to restore the success rates while preserving the reclaim fix.
+
+Commit 39deaf85 ("mm: compaction: make isolate_lru_page() filter-aware")
+noted that compaction does not migrate dirty or writeback pages and that
+is was meaningless to pick the page and re-add it to the LRU list.  This
+had to be partially reverted because some dirty pages can be migrated by
+compaction without blocking.
+
+This patch updates "mm: compaction: make isolate_lru_page" by skipping
+over pages that migration has no possibility of migrating to minimise LRU
+disruption.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Rik van Riel<riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Cc: Dave Jones <davej@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Andy Isaacson <adi@hexapodia.org>
+Cc: Nai Xia <nai.xia@gmail.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mmzone.h |    2 ++
+ mm/compaction.c        |    3 +++
+ mm/vmscan.c            |   35 +++++++++++++++++++++++++++++++++--
+ 3 files changed, 38 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -166,6 +166,8 @@ static inline int is_unevictable_lru(enu
+ #define ISOLATE_CLEAN         ((__force isolate_mode_t)0x4)
+ /* Isolate unmapped file */
+ #define ISOLATE_UNMAPPED      ((__force isolate_mode_t)0x8)
++/* Isolate for asynchronous migration */
++#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10)
+ /* LRU Isolation modes. */
+ typedef unsigned __bitwise__ isolate_mode_t;
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -371,6 +371,9 @@ static isolate_migrate_t isolate_migrate
+                       continue;
+               }
++              if (!cc->sync)
++                      mode |= ISOLATE_ASYNC_MIGRATE;
++
+               /* Try isolate the page */
+               if (__isolate_lru_page(page, mode, 0) != 0)
+                       continue;
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1045,8 +1045,39 @@ int __isolate_lru_page(struct page *page
+       ret = -EBUSY;
+-      if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
+-              return ret;
++      /*
++       * To minimise LRU disruption, the caller can indicate that it only
++       * wants to isolate pages it will be able to operate on without
++       * blocking - clean pages for the most part.
++       *
++       * ISOLATE_CLEAN means that only clean pages should be isolated. This
++       * is used by reclaim when it is cannot write to backing storage
++       *
++       * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
++       * that it is possible to migrate without blocking
++       */
++      if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
++              /* All the caller can do on PageWriteback is block */
++              if (PageWriteback(page))
++                      return ret;
++
++              if (PageDirty(page)) {
++                      struct address_space *mapping;
++
++                      /* ISOLATE_CLEAN means only clean pages */
++                      if (mode & ISOLATE_CLEAN)
++                              return ret;
++
++                      /*
++                       * Only pages without mappings or that have a
++                       * ->migratepage callback are possible to migrate
++                       * without blocking
++                       */
++                      mapping = page_mapping(page);
++                      if (mapping && !mapping->a_ops->migratepage)
++                              return ret;
++              }
++      }
+       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+               return ret;
index 9f903707a39c39f69472928be615f91efdd4f9f8..f05ecd61b1ff75991060f78356c8695ad8e51290 100644 (file)
@@ -22,6 +22,8 @@ Cc: Michal Hocko <mhocko@suse.cz>
 Cc: Andrea Arcangeli <aarcange@redhat.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 
 ---
  mm/migrate.c |   75 +++++++++++++++++++++++++++++++----------------------------
diff --git a/queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch b/queue-3.0/mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch
new file mode 100644 (file)
index 0000000..c45ad15
--- /dev/null
@@ -0,0 +1,140 @@
+From 66199712e9eef5aede09dbcd9dfff87798a66917 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Thu, 12 Jan 2012 17:19:41 -0800
+Subject: mm: page allocator: do not call direct reclaim for THP allocations while compaction is deferred
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 66199712e9eef5aede09dbcd9dfff87798a66917 upstream.
+
+Stable note: Not tracked in Buzilla. This was part of a series that
+       reduced interactivity stalls experienced when THP was enabled.
+
+If compaction is deferred, direct reclaim is used to try to free enough
+pages for the allocation to succeed.  For small high-orders, this has a
+reasonable chance of success.  However, if the caller has specified
+__GFP_NO_KSWAPD to limit the disruption to the system, it makes more sense
+to fail the allocation rather than stall the caller in direct reclaim.
+This patch skips direct reclaim if compaction is deferred and the caller
+specifies __GFP_NO_KSWAPD.
+
+Async compaction only considers a subset of pages so it is possible for
+compaction to be deferred prematurely and not enter direct reclaim even in
+cases where it should.  To compensate for this, this patch also defers
+compaction only if sync compaction failed.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Minchan Kim <minchan.kim@gmail.com>
+Reviewed-by: Rik van Riel<riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Dave Jones <davej@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Andy Isaacson <adi@hexapodia.org>
+Cc: Nai Xia <nai.xia@gmail.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   45 +++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 35 insertions(+), 10 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1897,14 +1897,20 @@ static struct page *
+ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+       struct zonelist *zonelist, enum zone_type high_zoneidx,
+       nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
+-      int migratetype, unsigned long *did_some_progress,
+-      bool sync_migration)
++      int migratetype, bool sync_migration,
++      bool *deferred_compaction,
++      unsigned long *did_some_progress)
+ {
+       struct page *page;
+-      if (!order || compaction_deferred(preferred_zone))
++      if (!order)
+               return NULL;
++      if (compaction_deferred(preferred_zone)) {
++              *deferred_compaction = true;
++              return NULL;
++      }
++
+       current->flags |= PF_MEMALLOC;
+       *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
+                                               nodemask, sync_migration);
+@@ -1932,7 +1938,13 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+                * but not enough to satisfy watermarks.
+                */
+               count_vm_event(COMPACTFAIL);
+-              defer_compaction(preferred_zone);
++
++              /*
++               * As async compaction considers a subset of pageblocks, only
++               * defer if the failure was a sync compaction failure.
++               */
++              if (sync_migration)
++                      defer_compaction(preferred_zone);
+               cond_resched();
+       }
+@@ -1944,8 +1956,9 @@ static inline struct page *
+ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+       struct zonelist *zonelist, enum zone_type high_zoneidx,
+       nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
+-      int migratetype, unsigned long *did_some_progress,
+-      bool sync_migration)
++      int migratetype, bool sync_migration,
++      bool *deferred_compaction,
++      unsigned long *did_some_progress)
+ {
+       return NULL;
+ }
+@@ -2095,6 +2108,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+       unsigned long pages_reclaimed = 0;
+       unsigned long did_some_progress;
+       bool sync_migration = false;
++      bool deferred_compaction = false;
+       /*
+        * In the slowpath, we sanity check order to avoid ever trying to
+@@ -2175,12 +2189,22 @@ rebalance:
+                                       zonelist, high_zoneidx,
+                                       nodemask,
+                                       alloc_flags, preferred_zone,
+-                                      migratetype, &did_some_progress,
+-                                      sync_migration);
++                                      migratetype, sync_migration,
++                                      &deferred_compaction,
++                                      &did_some_progress);
+       if (page)
+               goto got_pg;
+       sync_migration = true;
++      /*
++       * If compaction is deferred for high-order allocations, it is because
++       * sync compaction recently failed. In this is the case and the caller
++       * has requested the system not be heavily disrupted, fail the
++       * allocation now instead of entering direct reclaim
++       */
++      if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
++              goto nopage;
++
+       /* Try direct reclaim and then allocating */
+       page = __alloc_pages_direct_reclaim(gfp_mask, order,
+                                       zonelist, high_zoneidx,
+@@ -2243,8 +2267,9 @@ rebalance:
+                                       zonelist, high_zoneidx,
+                                       nodemask,
+                                       alloc_flags, preferred_zone,
+-                                      migratetype, &did_some_progress,
+-                                      sync_migration);
++                                      migratetype, sync_migration,
++                                      &deferred_compaction,
++                                      &did_some_progress);
+               if (page)
+                       goto got_pg;
+       }
index 30fcb6914bbb09ee88232f146b3be6af7087bfca..b7428b7c8f4b82a1a5bcb49eacfd596265a44b73 100644 (file)
@@ -21,3 +21,8 @@ mm-zone_reclaim-make-isolate_lru_page-filter-aware.patch
 mm-migration-clean-up-unmap_and_move.patch
 mm-compaction-allow-compaction-to-isolate-dirty-pages.patch
 mm-compaction-determine-if-dirty-pages-can-be-migrated-without-blocking-within-migratepage.patch
+mm-page-allocator-do-not-call-direct-reclaim-for-thp-allocations-while-compaction-is-deferred.patch
+mm-compaction-make-isolate_lru_page-filter-aware-again.patch
+kswapd-avoid-unnecessary-rebalance-after-an-unsuccessful-balancing.patch
+kswapd-assign-new_order-and-new_classzone_idx-after-wakeup-in-sleeping.patch
+mm-compaction-introduce-sync-light-migration-for-use-by-compaction.patch