From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 Oct 2013 21:19:27 +0000 (-0700)
Subject: 3.4-stable patches
X-Git-Tag: v3.4.68~9
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b29b043c781d4edac8690cd15dc0005df050fb01;p=thirdparty%2Fkernel%2Fstable-queue.git

3.4-stable patches

added patches:
	dm-snapshot-fix-data-corruption.patch
	fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
	mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
	wireless-radiotap-fix-parsing-buffer-overrun.patch
	writeback-fix-negative-bdi-max-pause.patch
---

diff --git a/queue-3.4/dm-snapshot-fix-data-corruption.patch b/queue-3.4/dm-snapshot-fix-data-corruption.patch
new file mode 100644
index 00000000000..113824cba56
--- /dev/null
+++ b/queue-3.4/dm-snapshot-fix-data-corruption.patch
@@ -0,0 +1,88 @@
+From e9c6a182649f4259db704ae15a91ac820e63b0ca Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Wed, 16 Oct 2013 03:17:47 +0100
+Subject: dm snapshot: fix data corruption
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit e9c6a182649f4259db704ae15a91ac820e63b0ca upstream.
+
+This patch fixes a particular type of data corruption that has been
+encountered when loading a snapshot's metadata from disk.
+
+When we allocate a new chunk in persistent_prepare, we increment
+ps->next_free and we make sure that it doesn't point to a metadata area
+by further incrementing it if necessary.
+
+When we load metadata from disk on device activation, ps->next_free is
+positioned after the last used data chunk. However, if this last used
+data chunk is followed by a metadata area, ps->next_free is positioned
+erroneously to the metadata area. A newly-allocated chunk is placed at
+the same location as the metadata area, resulting in data or metadata
+corruption.
+
+This patch changes the code so that ps->next_free skips the metadata
+area when metadata are loaded in function read_exceptions.
+
+The patch also moves a piece of code from persistent_prepare_exception
+to a separate function skip_metadata to avoid code duplication.
+
+CVE-2013-4299
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-snap-persistent.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-snap-persistent.c
++++ b/drivers/md/dm-snap-persistent.c
+@@ -269,6 +269,14 @@ static chunk_t area_location(struct psto
+ 	return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
+ }
+ 
++static void skip_metadata(struct pstore *ps)
++{
++	uint32_t stride = ps->exceptions_per_area + 1;
++	chunk_t next_free = ps->next_free;
++	if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
++		ps->next_free++;
++}
++
+ /*
+  * Read or write a metadata area.  Remembering to skip the first
+  * chunk which holds the header.
+@@ -502,6 +510,8 @@ static int read_exceptions(struct pstore
+ 
+ 	ps->current_area--;
+ 
++	skip_metadata(ps);
++
+ 	return 0;
+ }
+ 
+@@ -616,8 +626,6 @@ static int persistent_prepare_exception(
+ 					struct dm_exception *e)
+ {
+ 	struct pstore *ps = get_info(store);
+-	uint32_t stride;
+-	chunk_t next_free;
+ 	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
+ 
+ 	/* Is there enough room ? */
+@@ -630,10 +638,8 @@ static int persistent_prepare_exception(
+ 	 * Move onto the next free pending, making sure to take
+ 	 * into account the location of the metadata chunks.
+ 	 */
+-	stride = (ps->exceptions_per_area + 1);
+-	next_free = ++ps->next_free;
+-	if (sector_div(next_free, stride) == 1)
+-		ps->next_free++;
++	ps->next_free++;
++	skip_metadata(ps);
+ 
+ 	atomic_inc(&ps->pending_count);
+ 	return 0;
diff --git a/queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch b/queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
new file mode 100644
index 00000000000..440cee67db8
--- /dev/null
+++ b/queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
@@ -0,0 +1,75 @@
+From 84235de394d9775bfaa7fa9762a59d91fef0c1fc Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Wed, 16 Oct 2013 13:47:00 -0700
+Subject: fs: buffer: move allocation failure loop into the allocator
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 84235de394d9775bfaa7fa9762a59d91fef0c1fc upstream.
+
+Buffer allocation has a very crude indefinite loop around waking the
+flusher threads and performing global NOFS direct reclaim because it can
+not handle allocation failures.
+
+The most immediate problem with this is that the allocation may fail due
+to a memory cgroup limit, where flushers + direct reclaim might not make
+any progress towards resolving the situation at all.  Because unlike the
+global case, a memory cgroup may not have any cache at all, only
+anonymous pages but no swap.  This situation will lead to a reclaim
+livelock with insane IO from waking the flushers and thrashing unrelated
+filesystem cache in a tight loop.
+
+Use __GFP_NOFAIL allocations for buffers for now.  This makes sure that
+any looping happens in the page allocator, which knows how to
+orchestrate kswapd, direct reclaim, and the flushers sensibly.  It also
+allows memory cgroups to detect allocations that can't handle failure
+and will allow them to ultimately bypass the limit if reclaim can not
+make progress.
+
+Reported-by: azurIt <azurit@pobox.sk>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/buffer.c     |   14 ++++++++++++--
+ mm/memcontrol.c |    2 ++
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -957,9 +957,19 @@ grow_dev_page(struct block_device *bdev,
+ 	struct buffer_head *bh;
+ 	sector_t end_block;
+ 	int ret = 0;		/* Will call free_more_memory() */
++	gfp_t gfp_mask;
+ 
+-	page = find_or_create_page(inode->i_mapping, index,
+-		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
++	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
++	gfp_mask |= __GFP_MOVABLE;
++	/*
++	 * XXX: __getblk_slow() can not really deal with failure and
++	 * will endlessly loop on improvised global reclaim.  Prefer
++	 * looping in the allocator rather than here, at least that
++	 * code knows what it's doing.
++	 */
++	gfp_mask |= __GFP_NOFAIL;
++
++	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
+ 	if (!page)
+ 		return ret;
+ 
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2412,6 +2412,8 @@ done:
+ 	return 0;
+ nomem:
+ 	*ptr = NULL;
++	if (gfp_mask & __GFP_NOFAIL)
++		return 0;
+ 	return -ENOMEM;
+ bypass:
+ 	*ptr = root_mem_cgroup;
diff --git a/queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch b/queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
new file mode 100644
index 00000000000..7ac0ea0b7e9
--- /dev/null
+++ b/queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
@@ -0,0 +1,80 @@
+From c3d16e16522fe3fe8759735850a0676da18f4b1d Mon Sep 17 00:00:00 2001
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+Date: Wed, 16 Oct 2013 13:46:51 -0700
+Subject: mm: migration: do not lose soft dirty bit if page is in migration state
+
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+
+commit c3d16e16522fe3fe8759735850a0676da18f4b1d upstream.
+
+If page migration is turned on in config and the page is migrating, we
+may lose the soft dirty bit.  If fork and mprotect are called on
+migrating pages (once migration is complete) pages do not obtain the
+soft dirty bit in the correspond pte entries.  Fix it adding an
+appropriate test on swap entries.
+
+Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Matt Mackall <mpm@selenic.com>
+Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory.c   |    2 ++
+ mm/migrate.c  |    2 ++
+ mm/mprotect.c |    7 +++++--
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -876,6 +876,8 @@ copy_one_pte(struct mm_struct *dst_mm, s
+ 					 */
+ 					make_migration_entry_read(&entry);
+ 					pte = swp_entry_to_pte(entry);
++					if (pte_swp_soft_dirty(*src_pte))
++						pte = pte_swp_mksoft_dirty(pte);
+ 					set_pte_at(src_mm, addr, src_pte, pte);
+ 				}
+ 			}
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -139,6 +139,8 @@ static int remove_migration_pte(struct p
+ 
+ 	get_page(new);
+ 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
++	if (pte_swp_soft_dirty(*ptep))
++		pte = pte_mksoft_dirty(pte);
+ 	if (is_write_migration_entry(entry))
+ 		pte = pte_mkwrite(pte);
+ #ifdef CONFIG_HUGETLB_PAGE
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -64,13 +64,16 @@ static void change_pte_range(struct mm_s
+ 			swp_entry_t entry = pte_to_swp_entry(oldpte);
+ 
+ 			if (is_write_migration_entry(entry)) {
++				pte_t newpte;
+ 				/*
+ 				 * A protection check is difficult so
+ 				 * just be safe and disable write
+ 				 */
+ 				make_migration_entry_read(&entry);
+-				set_pte_at(mm, addr, pte,
+-					swp_entry_to_pte(entry));
++				newpte = swp_entry_to_pte(entry);
++				if (pte_swp_soft_dirty(oldpte))
++					newpte = pte_swp_mksoft_dirty(newpte);
++				set_pte_at(mm, addr, pte, newpte);
+ 			}
+ 		}
+ 	} while (pte++, addr += PAGE_SIZE, addr != end);
diff --git a/queue-3.4/series b/queue-3.4/series
index 792fcbb866c..a7952a67ccc 100644
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -24,3 +24,8 @@ net-fix-cipso-packet-validation-when-netlabel.patch
 inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch
 davinci_emac.c-fix-iff_allmulti-setup.patch
 ext3-return-32-64-bit-dir-name-hash-according-to-usage-type.patch
+dm-snapshot-fix-data-corruption.patch
+mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
+writeback-fix-negative-bdi-max-pause.patch
+wireless-radiotap-fix-parsing-buffer-overrun.patch
+fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
diff --git a/queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch b/queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch
new file mode 100644
index 00000000000..8b66665d6e2
--- /dev/null
+++ b/queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch
@@ -0,0 +1,54 @@
+From f5563318ff1bde15b10e736e97ffce13be08bc1a Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Fri, 11 Oct 2013 14:47:05 +0200
+Subject: wireless: radiotap: fix parsing buffer overrun
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit f5563318ff1bde15b10e736e97ffce13be08bc1a upstream.
+
+When parsing an invalid radiotap header, the parser can overrun
+the buffer that is passed in because it doesn't correctly check
+ 1) the minimum radiotap header size
+ 2) the space for extended bitmaps
+
+The first issue doesn't affect any in-kernel user as they all
+check the minimum size before calling the radiotap function.
+The second issue could potentially affect the kernel if an skb
+is passed in that consists only of the radiotap header with a
+lot of extended bitmaps that extend past the SKB. In that case
+a read-only buffer overrun by at most 4 bytes is possible.
+
+Fix this by adding the appropriate checks to the parser.
+
+Reported-by: Evan Huus <eapache@gmail.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/wireless/radiotap.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/wireless/radiotap.c
++++ b/net/wireless/radiotap.c
+@@ -95,6 +95,10 @@ int ieee80211_radiotap_iterator_init(
+ 	struct ieee80211_radiotap_header *radiotap_header,
+ 	int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
+ {
++	/* check the radiotap header can actually be present */
++	if (max_length < sizeof(struct ieee80211_radiotap_header))
++		return -EINVAL;
++
+ 	/* Linux only supports version 0 radiotap format */
+ 	if (radiotap_header->it_version)
+ 		return -EINVAL;
+@@ -129,7 +133,8 @@ int ieee80211_radiotap_iterator_init(
+ 			 */
+ 
+ 			if ((unsigned long)iterator->_arg -
+-			    (unsigned long)iterator->_rtheader >
++			    (unsigned long)iterator->_rtheader +
++			    sizeof(uint32_t) >
+ 			    (unsigned long)iterator->_max_length)
+ 				return -EINVAL;
+ 		}
diff --git a/queue-3.4/writeback-fix-negative-bdi-max-pause.patch b/queue-3.4/writeback-fix-negative-bdi-max-pause.patch
new file mode 100644
index 00000000000..48dcf5e9032
--- /dev/null
+++ b/queue-3.4/writeback-fix-negative-bdi-max-pause.patch
@@ -0,0 +1,93 @@
+From e3b6c655b91e01a1dade056cfa358581b47a5351 Mon Sep 17 00:00:00 2001
+From: Fengguang Wu <fengguang.wu@intel.com>
+Date: Wed, 16 Oct 2013 13:47:03 -0700
+Subject: writeback: fix negative bdi max pause
+
+From: Fengguang Wu <fengguang.wu@intel.com>
+
+commit e3b6c655b91e01a1dade056cfa358581b47a5351 upstream.
+
+Toralf runs trinity on UML/i386.  After some time it hangs and the last
+message line is
+
+	BUG: soft lockup - CPU#0 stuck for 22s! [trinity-child0:1521]
+
+It's found that pages_dirtied becomes very large.  More than 1000000000
+pages in this case:
+
+	period = HZ * pages_dirtied / task_ratelimit;
+	BUG_ON(pages_dirtied > 2000000000);
+	BUG_ON(pages_dirtied > 1000000000);      <---------
+
+UML debug printf shows that we got negative pause here:
+
+	ick: pause : -984
+	ick: pages_dirtied : 0
+	ick: task_ratelimit: 0
+
+	 pause:
+	+       if (pause < 0)  {
+	+               extern int printf(char *, ...);
+	+               printf("ick : pause : %li\n", pause);
+	+               printf("ick: pages_dirtied : %lu\n", pages_dirtied);
+	+               printf("ick: task_ratelimit: %lu\n", task_ratelimit);
+	+               BUG_ON(1);
+	+       }
+	        trace_balance_dirty_pages(bdi,
+
+Since pause is bounded by [min_pause, max_pause] where min_pause is also
+bounded by max_pause.  It's suspected and demonstrated that the
+max_pause calculation goes wrong:
+
+	ick: pause : -717
+	ick: min_pause : -177
+	ick: max_pause : -717
+	ick: pages_dirtied : 14
+	ick: task_ratelimit: 0
+
+The problem lies in the two "long = unsigned long" assignments in
+bdi_max_pause() which might go negative if the highest bit is 1, and the
+min_t(long, ...) check failed to protect it falling under 0.  Fix all of
+them by using "unsigned long" throughout the function.
+
+Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
+Reported-by: Toralf FÃ¶rster <toralf.foerster@gmx.de>
+Tested-by: Toralf FÃ¶rster <toralf.foerster@gmx.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Richard Weinberger <richard@nod.at>
+Cc: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1072,11 +1072,11 @@ static unsigned long dirty_poll_interval
+ 	return 1;
+ }
+ 
+-static long bdi_max_pause(struct backing_dev_info *bdi,
+-			  unsigned long bdi_dirty)
++static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
++				   unsigned long bdi_dirty)
+ {
+-	long bw = bdi->avg_write_bandwidth;
+-	long t;
++	unsigned long bw = bdi->avg_write_bandwidth;
++	unsigned long t;
+ 
+ 	/*
+ 	 * Limit pause time for small memory systems. If sleeping for too long
+@@ -1088,7 +1088,7 @@ static long bdi_max_pause(struct backing
+ 	t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
+ 	t++;
+ 
+-	return min_t(long, t, MAX_PAUSE);
++	return min_t(unsigned long, t, MAX_PAUSE);
+ }
+ 
+ static long bdi_min_pause(struct backing_dev_info *bdi,