--- /dev/null
+From 938929f14cb595f43cd1a4e63e22d36cab1e4a1f Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Tue, 10 Jan 2012 15:07:14 -0800
+Subject: mm: reduce the amount of work done when updating min_free_kbytes
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 938929f14cb595f43cd1a4e63e22d36cab1e4a1f upstream.
+
+Stable note: Fixes https://bugzilla.novell.com/show_bug.cgi?id=726210 .
+ Large machines with 1TB or more of RAM take a long time to boot
+ without this patch and may spew out soft lockup warnings.
+
+When min_free_kbytes is updated, some pageblocks are marked
+MIGRATE_RESERVE. Ordinarily, this work is unnoticable as it happens early
+in boot but on large machines with 1TB of memory, this has been reported
+to delay boot times, probably due to the NUMA distances involved.
+
+The bulk of the work is due to calling calling pageblock_is_reserved() an
+unnecessary amount of times and accessing far more struct page metadata
+than is necessary. This patch significantly reduces the amount of work
+done by setup_zone_migrate_reserve() improving boot times on 1TB machines.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c | 40 ++++++++++++++++++++++++----------------
+ 1 file changed, 24 insertions(+), 16 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3418,25 +3418,33 @@ static void setup_zone_migrate_reserve(s
+ if (page_to_nid(page) != zone_to_nid(zone))
+ continue;
+
+- /* Blocks with reserved pages will never free, skip them. */
+- block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+- if (pageblock_is_reserved(pfn, block_end_pfn))
+- continue;
+-
+ block_migratetype = get_pageblock_migratetype(page);
+
+- /* If this block is reserved, account for it */
+- if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
+- reserve--;
+- continue;
+- }
++ /* Only test what is necessary when the reserves are not met */
++ if (reserve > 0) {
++ /*
++ * Blocks with reserved pages will never free, skip
++ * them.
++ */
++ block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
++ if (pageblock_is_reserved(pfn, block_end_pfn))
++ continue;
+
+- /* Suitable for reserving if this block is movable */
+- if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
+- set_pageblock_migratetype(page, MIGRATE_RESERVE);
+- move_freepages_block(zone, page, MIGRATE_RESERVE);
+- reserve--;
+- continue;
++ /* If this block is reserved, account for it */
++ if (block_migratetype == MIGRATE_RESERVE) {
++ reserve--;
++ continue;
++ }
++
++ /* Suitable for reserving if this block is movable */
++ if (block_migratetype == MIGRATE_MOVABLE) {
++ set_pageblock_migratetype(page,
++ MIGRATE_RESERVE);
++ move_freepages_block(zone, page,
++ MIGRATE_RESERVE);
++ reserve--;
++ continue;
++ }
+ }
+
+ /*
--- /dev/null
+From a4d3e9e76337059406fcf3ead288c0df22a790e9 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <jweiner@redhat.com>
+Date: Wed, 14 Sep 2011 16:21:52 -0700
+Subject: mm: vmscan: fix force-scanning small targets without swap
+
+From: Johannes Weiner <jweiner@redhat.com>
+
+commit a4d3e9e76337059406fcf3ead288c0df22a790e9 upstream.
+
+Stable note: Not tracked in Bugzilla. This patch augments an earlier commit
+ that avoids scanning priority being artificially raised. The older
+ fix was particularly important for small memcgs to avoid calling
+ wait_iff_congested() unnecessarily.
+
+Without swap, anonymous pages are not scanned. As such, they should not
+count when considering force-scanning a small target if there is no swap.
+
+Otherwise, targets are not force-scanned even when their effective scan
+number is zero and the other conditions--kswapd/memcg--apply.
+
+This fixes 246e87a93934 ("memcg: fix get_scan_count() for small
+targets").
+
+[akpm@linux-foundation.org: fix comment]
+Signed-off-by: Johannes Weiner <jweiner@redhat.com>
+Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Cc: Ying Han <yinghan@google.com>
+Cc: Balbir Singh <bsingharora@gmail.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
+Acked-by: Mel Gorman <mel@csn.ul.ie>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 27 ++++++++++++---------------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1747,23 +1747,15 @@ static void get_scan_count(struct zone *
+ u64 fraction[2], denominator;
+ enum lru_list l;
+ int noswap = 0;
+- int force_scan = 0;
++ bool force_scan = false;
+ unsigned long nr_force_scan[2];
+
+-
+- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+-
+- if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
+- /* kswapd does zone balancing and need to scan this zone */
+- if (scanning_global_lru(sc) && current_is_kswapd())
+- force_scan = 1;
+- /* memcg may have small limit and need to avoid priority drop */
+- if (!scanning_global_lru(sc))
+- force_scan = 1;
+- }
++ /* kswapd does zone balancing and needs to scan this zone */
++ if (scanning_global_lru(sc) && current_is_kswapd())
++ force_scan = true;
++ /* memcg may have small limit and need to avoid priority drop */
++ if (!scanning_global_lru(sc))
++ force_scan = true;
+
+ /* If we have no swap space, do not bother scanning anon pages. */
+ if (!sc->may_swap || (nr_swap_pages <= 0)) {
+@@ -1776,6 +1768,11 @@ static void get_scan_count(struct zone *
+ goto out;
+ }
+
++ anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
++ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
++ file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
++ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
++
+ if (scanning_global_lru(sc)) {
+ free = zone_page_state(zone, NR_FREE_PAGES);
+ /* If we have very few page cache pages,
dm-raid1-fix-crash-with-mirror-recovery-and-discard.patch
mm-vmstat.c-cache-align-vm_stat.patch
mm-memory-hotplug-check-if-pages-are-correctly-reserved-on-a-per-section-basis.patch
+mm-reduce-the-amount-of-work-done-when-updating-min_free_kbytes.patch
+mm-vmscan-fix-force-scanning-small-targets-without-swap.patch
+vmscan-clear-zone_congested-for-zone-with-good-watermark.patch
+vmscan-add-shrink_slab-tracepoints.patch
--- /dev/null
+From 095760730c1047c69159ce88021a7fa3833502c8 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Fri, 8 Jul 2011 14:14:34 +1000
+Subject: vmscan: add shrink_slab tracepoints
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 095760730c1047c69159ce88021a7fa3833502c8 upstream.
+
+Stable note: This patch makes later patches easier to apply but otherwise
+ has little to justify it. It is a diagnostic patch that was part
+ of a series addressing excessive slab shrinking after GFP_NOFS
+ failures. There is detailed information on the series' motivation
+ at https://lkml.org/lkml/2011/6/2/42 .
+
+It is impossible to understand what the shrinkers are actually doing
+without instrumenting the code, so add a some tracepoints to allow
+insight to be gained.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+
+---
+ include/trace/events/vmscan.h | 77 ++++++++++++++++++++++++++++++++++++++++++
+ mm/vmscan.c | 8 +++-
+ 2 files changed, 84 insertions(+), 1 deletion(-)
+
+--- a/include/trace/events/vmscan.h
++++ b/include/trace/events/vmscan.h
+@@ -179,6 +179,83 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_en
+ TP_ARGS(nr_reclaimed)
+ );
+
++TRACE_EVENT(mm_shrink_slab_start,
++ TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
++ long nr_objects_to_shrink, unsigned long pgs_scanned,
++ unsigned long lru_pgs, unsigned long cache_items,
++ unsigned long long delta, unsigned long total_scan),
++
++ TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
++ cache_items, delta, total_scan),
++
++ TP_STRUCT__entry(
++ __field(struct shrinker *, shr)
++ __field(void *, shrink)
++ __field(long, nr_objects_to_shrink)
++ __field(gfp_t, gfp_flags)
++ __field(unsigned long, pgs_scanned)
++ __field(unsigned long, lru_pgs)
++ __field(unsigned long, cache_items)
++ __field(unsigned long long, delta)
++ __field(unsigned long, total_scan)
++ ),
++
++ TP_fast_assign(
++ __entry->shr = shr;
++ __entry->shrink = shr->shrink;
++ __entry->nr_objects_to_shrink = nr_objects_to_shrink;
++ __entry->gfp_flags = sc->gfp_mask;
++ __entry->pgs_scanned = pgs_scanned;
++ __entry->lru_pgs = lru_pgs;
++ __entry->cache_items = cache_items;
++ __entry->delta = delta;
++ __entry->total_scan = total_scan;
++ ),
++
++ TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
++ __entry->shrink,
++ __entry->shr,
++ __entry->nr_objects_to_shrink,
++ show_gfp_flags(__entry->gfp_flags),
++ __entry->pgs_scanned,
++ __entry->lru_pgs,
++ __entry->cache_items,
++ __entry->delta,
++ __entry->total_scan)
++);
++
++TRACE_EVENT(mm_shrink_slab_end,
++ TP_PROTO(struct shrinker *shr, int shrinker_retval,
++ long unused_scan_cnt, long new_scan_cnt),
++
++ TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt),
++
++ TP_STRUCT__entry(
++ __field(struct shrinker *, shr)
++ __field(void *, shrink)
++ __field(long, unused_scan)
++ __field(long, new_scan)
++ __field(int, retval)
++ __field(long, total_scan)
++ ),
++
++ TP_fast_assign(
++ __entry->shr = shr;
++ __entry->shrink = shr->shrink;
++ __entry->unused_scan = unused_scan_cnt;
++ __entry->new_scan = new_scan_cnt;
++ __entry->retval = shrinker_retval;
++ __entry->total_scan = new_scan_cnt - unused_scan_cnt;
++ ),
++
++ TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d",
++ __entry->shrink,
++ __entry->shr,
++ __entry->unused_scan,
++ __entry->new_scan,
++ __entry->total_scan,
++ __entry->retval)
++);
+
+ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -250,6 +250,7 @@ unsigned long shrink_slab(struct shrink_
+ unsigned long long delta;
+ unsigned long total_scan;
+ unsigned long max_pass;
++ int shrink_ret = 0;
+
+ max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+ delta = (4 * nr_pages_scanned) / shrinker->seeks;
+@@ -274,9 +275,12 @@ unsigned long shrink_slab(struct shrink_
+ total_scan = shrinker->nr;
+ shrinker->nr = 0;
+
++ trace_mm_shrink_slab_start(shrinker, shrink, total_scan,
++ nr_pages_scanned, lru_pages,
++ max_pass, delta, total_scan);
++
+ while (total_scan >= SHRINK_BATCH) {
+ long this_scan = SHRINK_BATCH;
+- int shrink_ret;
+ int nr_before;
+
+ nr_before = do_shrinker_shrink(shrinker, shrink, 0);
+@@ -293,6 +297,8 @@ unsigned long shrink_slab(struct shrink_
+ }
+
+ shrinker->nr += total_scan;
++ trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan,
++ shrinker->nr);
+ }
+ up_read(&shrinker_rwsem);
+ out:
--- /dev/null
+From 439423f6894aa0dec22187526827456f5004baed Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shaohua.li@intel.com>
+Date: Thu, 25 Aug 2011 15:59:12 -0700
+Subject: vmscan: clear ZONE_CONGESTED for zone with good watermark
+
+From: Shaohua Li <shaohua.li@intel.com>
+
+commit 439423f6894aa0dec22187526827456f5004baed upstream.
+
+Stable note: Not tracked in Bugzilla. kswapd is responsible for clearing
+ ZONE_CONGESTED after it balances a zone and this patch fixes a bug
+ where that was failing to happen. Without this patch, processes
+ can stall in wait_iff_congested unnecessarily. For users, this can
+ look like an interactivity stall but some workloads would see it
+ as sudden drop in throughput.
+
+ZONE_CONGESTED is only cleared in kswapd, but pages can be freed in any
+task. It's possible ZONE_CONGESTED isn't cleared in some cases:
+
+ 1. the zone is already balanced just entering balance_pgdat() for
+ order-0 because concurrent tasks free memory. In this case, later
+ check will skip the zone as it's balanced so the flag isn't cleared.
+
+ 2. high order balance fallbacks to order-0. quote from Mel: At the
+ end of balance_pgdat(), kswapd uses the following logic;
+
+ If reclaiming at high order {
+ for each zone {
+ if all_unreclaimable
+ skip
+ if watermark is not met
+ order = 0
+ loop again
+
+ /* watermark is met */
+ clear congested
+ }
+ }
+
+ i.e. it clears ZONE_CONGESTED if it the zone is balanced. if not,
+ it restarts balancing at order-0. However, if the higher zones are
+ balanced for order-0, kswapd will miss clearing ZONE_CONGESTED as
+ that only happens after a zone is shrunk. This can mean that
+ wait_iff_congested() stalls unnecessarily.
+
+This patch makes kswapd clear ZONE_CONGESTED during its initial
+highmem->dma scan for zones that are already balanced.
+
+Signed-off-by: Shaohua Li <shaohua.li@intel.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2456,6 +2456,9 @@ loop_again:
+ high_wmark_pages(zone), 0, 0)) {
+ end_zone = i;
+ break;
++ } else {
++ /* If balanced, clear the congested flag */
++ zone_clear_flag(zone, ZONE_CONGESTED);
+ }
+ }
+ if (i < 0)