From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 26 Oct 2012 19:36:43 +0000 (-0700)
Subject: 3.4-stable patches
X-Git-Tag: v3.0.49~16
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8c71e98e962e1e02cf549288ba0cdf901431b7dd;p=thirdparty%2Fkernel%2Fstable-queue.git

3.4-stable patches

added patches:
	mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch
	revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch
	sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch
	sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch
	sunrpc-prevent-races-in-xs_abort_connection.patch
	x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch
---

diff --git a/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch b/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch
new file mode 100644
index 00000000000..e7360279f65
--- /dev/null
+++ b/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch
@@ -0,0 +1,107 @@
+From ef5d437f71afdf4afdbab99213add99f4b1318fd Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 25 Oct 2012 13:37:31 -0700
+Subject: mm: fix XFS oops due to dirty pages without buffers on s390
+
+From: Jan Kara <jack@suse.cz>
+
+commit ef5d437f71afdf4afdbab99213add99f4b1318fd upstream.
+
+On s390 any write to a page (even from kernel itself) sets architecture
+specific page dirty bit.  Thus when a page is written to via buffered
+write, HW dirty bit gets set and when we later map and unmap the page,
+page_remove_rmap() finds the dirty bit and calls set_page_dirty().
+
+Dirtying of a page which shouldn't be dirty can cause all sorts of
+problems to filesystems.  The bug we observed in practice is that
+buffers from the page get freed, so when the page gets later marked as
+dirty and writeback writes it, XFS crashes due to an assertion
+BUG_ON(!PagePrivate(page)) in page_buffers() called from
+xfs_count_page_state().
+
+Similar problem can also happen when zero_user_segment() call from
+xfs_vm_writepage() (or block_write_full_page() for that matter) set the
+hardware dirty bit during writeback, later buffers get freed, and then
+page unmapped.
+
+Fix the issue by ignoring s390 HW dirty bit for page cache pages of
+mappings with mapping_cap_account_dirty().  This is safe because for
+such mappings when a page gets marked as writeable in PTE it is also
+marked dirty in do_wp_page() or do_page_fault().  When the dirty bit is
+cleared by clear_page_dirty_for_io(), the page gets writeprotected in
+page_mkclean().  So pagecache page is writeable if and only if it is
+dirty.
+
+Thanks to Hugh Dickins for pointing out mapping has to have
+mapping_cap_account_dirty() for things to work and proposing a cleaned
+up variant of the patch.
+
+The patch has survived about two hours of running fsx-linux on tmpfs
+while heavily swapping and several days of running on out build machines
+where the original problem was triggered.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/rmap.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -56,6 +56,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
++#include <linux/backing-dev.h>
+ 
+ #include <asm/tlbflush.h>
+ 
+@@ -977,11 +978,8 @@ int page_mkclean(struct page *page)
+ 
+ 	if (page_mapped(page)) {
+ 		struct address_space *mapping = page_mapping(page);
+-		if (mapping) {
++		if (mapping)
+ 			ret = page_mkclean_file(mapping, page);
+-			if (page_test_and_clear_dirty(page_to_pfn(page), 1))
+-				ret = 1;
+-		}
+ 	}
+ 
+ 	return ret;
+@@ -1167,6 +1165,7 @@ void page_add_file_rmap(struct page *pag
+  */
+ void page_remove_rmap(struct page *page)
+ {
++	struct address_space *mapping = page_mapping(page);
+ 	bool anon = PageAnon(page);
+ 	bool locked;
+ 	unsigned long flags;
+@@ -1189,8 +1188,19 @@ void page_remove_rmap(struct page *page)
+ 	 * this if the page is anon, so about to be freed; but perhaps
+ 	 * not if it's in swapcache - there might be another pte slot
+ 	 * containing the swap entry, but page not yet written to swap.
++	 *
++	 * And we can skip it on file pages, so long as the filesystem
++	 * participates in dirty tracking; but need to catch shm and tmpfs
++	 * and ramfs pages which have been modified since creation by read
++	 * fault.
++	 *
++	 * Note that mapping must be decided above, before decrementing
++	 * mapcount (which luckily provides a barrier): once page is unmapped,
++	 * it could be truncated and page->mapping reset to NULL at any moment.
++	 * Note also that we are relying on page_mapping(page) to set mapping
++	 * to &swapper_space when PageSwapCache(page).
+ 	 */
+-	if ((!anon || PageSwapCache(page)) &&
++	if (mapping && !mapping_cap_account_dirty(mapping) &&
+ 	    page_test_and_clear_dirty(page_to_pfn(page), 1))
+ 		set_page_dirty(page);
+ 	/*
diff --git a/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch b/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch
new file mode 100644
index 00000000000..19f834fe19f
--- /dev/null
+++ b/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch
@@ -0,0 +1,38 @@
+From b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 23 Oct 2012 11:40:02 -0400
+Subject: Revert "SUNRPC: Ensure we close the socket on EPIPE errors too..."
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 upstream.
+
+This reverts commit 55420c24a0d4d1fce70ca713f84aa00b6b74a70e.
+Now that we clear the connected flag when entering TCP_CLOSE_WAIT,
+the deadlock described in this commit is no longer possible.
+Instead, the resulting call to xs_tcp_shutdown() can interfere
+with pending reconnection attempts.
+
+Reported-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Tested-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -736,10 +736,10 @@ static int xs_tcp_send_request(struct rp
+ 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+ 			-status);
+ 	case -ECONNRESET:
+-	case -EPIPE:
+ 		xs_tcp_shutdown(xprt);
+ 	case -ECONNREFUSED:
+ 	case -ENOTCONN:
++	case -EPIPE:
+ 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ 	}
+ 
diff --git a/queue-3.4/series b/queue-3.4/series
index c69d8f99dd4..a60076de1f6 100644
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -6,3 +6,9 @@ fs-compat_ioctl.c-video_set_spu_palette-missing-error-check.patch
 drivers-rtc-rtc-imxdi.c-add-missing-spin-lock-initialization.patch
 genalloc-stop-crashing-the-system-when-destroying-a-pool.patch
 arm-7559-1-smp-switch-away-from-the-idmap-before-updating-init_mm.mm_count.patch
+x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch
+mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch
+sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch
+sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch
+revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch
+sunrpc-prevent-races-in-xs_abort_connection.patch
diff --git a/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch b/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch
new file mode 100644
index 00000000000..548e1b6e3be
--- /dev/null
+++ b/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch
@@ -0,0 +1,30 @@
+From d0bea455dd48da1ecbd04fedf00eb89437455fdc Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 23 Oct 2012 11:35:47 -0400
+Subject: SUNRPC: Clear the connect flag when socket state is TCP_CLOSE_WAIT
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit d0bea455dd48da1ecbd04fedf00eb89437455fdc upstream.
+
+This is needed to ensure that we call xprt_connect() upon the next
+call to call_connect().
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Tested-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -1525,6 +1525,7 @@ static void xs_tcp_state_change(struct s
+ 	case TCP_CLOSE_WAIT:
+ 		/* The server initiated a shutdown of the socket */
+ 		xprt->connect_cookie++;
++		clear_bit(XPRT_CONNECTED, &xprt->state);
+ 		xs_tcp_force_close(xprt);
+ 	case TCP_CLOSING:
+ 		/*
diff --git a/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch b/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch
new file mode 100644
index 00000000000..b055dfc0111
--- /dev/null
+++ b/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch
@@ -0,0 +1,105 @@
+From f878b657ce8e7d3673afe48110ec208a29e38c4a Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Mon, 22 Oct 2012 17:14:36 -0400
+Subject: SUNRPC: Get rid of the xs_error_report socket callback
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit f878b657ce8e7d3673afe48110ec208a29e38c4a upstream.
+
+Chris Perl reports that we're seeing races between the wakeup call in
+xs_error_report and the connect attempts. Basically, Chris has shown
+that in certain circumstances, the call to xs_error_report causes the
+rpc_task that is responsible for reconnecting to wake up early, thus
+triggering a disconnect and retry.
+
+Since the sk->sk_error_report() calls in the socket layer are always
+followed by a tcp_done() in the cases where we care about waking up
+the rpc_tasks, just let the state_change callbacks take responsibility
+for those wake ups.
+
+Reported-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Tested-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |   25 -------------------------
+ 1 file changed, 25 deletions(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -254,7 +254,6 @@ struct sock_xprt {
+ 	void			(*old_data_ready)(struct sock *, int);
+ 	void			(*old_state_change)(struct sock *);
+ 	void			(*old_write_space)(struct sock *);
+-	void			(*old_error_report)(struct sock *);
+ };
+ 
+ /*
+@@ -781,7 +780,6 @@ static void xs_save_old_callbacks(struct
+ 	transport->old_data_ready = sk->sk_data_ready;
+ 	transport->old_state_change = sk->sk_state_change;
+ 	transport->old_write_space = sk->sk_write_space;
+-	transport->old_error_report = sk->sk_error_report;
+ }
+ 
+ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+@@ -789,7 +787,6 @@ static void xs_restore_old_callbacks(str
+ 	sk->sk_data_ready = transport->old_data_ready;
+ 	sk->sk_state_change = transport->old_state_change;
+ 	sk->sk_write_space = transport->old_write_space;
+-	sk->sk_error_report = transport->old_error_report;
+ }
+ 
+ static void xs_reset_transport(struct sock_xprt *transport)
+@@ -1552,25 +1549,6 @@ static void xs_tcp_state_change(struct s
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ }
+ 
+-/**
+- * xs_error_report - callback mainly for catching socket errors
+- * @sk: socket
+- */
+-static void xs_error_report(struct sock *sk)
+-{
+-	struct rpc_xprt *xprt;
+-
+-	read_lock_bh(&sk->sk_callback_lock);
+-	if (!(xprt = xprt_from_sock(sk)))
+-		goto out;
+-	dprintk("RPC:       %s client %p...\n"
+-			"RPC:       error %d\n",
+-			__func__, xprt, sk->sk_err);
+-	xprt_wake_pending_tasks(xprt, -EAGAIN);
+-out:
+-	read_unlock_bh(&sk->sk_callback_lock);
+-}
+-
+ static void xs_write_space(struct sock *sk)
+ {
+ 	struct socket *sock;
+@@ -1870,7 +1848,6 @@ static int xs_local_finish_connecting(st
+ 		sk->sk_user_data = xprt;
+ 		sk->sk_data_ready = xs_local_data_ready;
+ 		sk->sk_write_space = xs_udp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+ 		xprt_clear_connected(xprt);
+@@ -1959,7 +1936,6 @@ static void xs_udp_finish_connecting(str
+ 		sk->sk_user_data = xprt;
+ 		sk->sk_data_ready = xs_udp_data_ready;
+ 		sk->sk_write_space = xs_udp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_no_check = UDP_CSUM_NORCV;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+@@ -2075,7 +2051,6 @@ static int xs_tcp_finish_connecting(stru
+ 		sk->sk_data_ready = xs_tcp_data_ready;
+ 		sk->sk_state_change = xs_tcp_state_change;
+ 		sk->sk_write_space = xs_tcp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+ 		/* socket options */
diff --git a/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch b/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch
new file mode 100644
index 00000000000..5bef4a824e9
--- /dev/null
+++ b/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch
@@ -0,0 +1,63 @@
+From 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 23 Oct 2012 17:50:07 -0400
+Subject: SUNRPC: Prevent races in xs_abort_connection()
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 upstream.
+
+The call to xprt_disconnect_done() that is triggered by a successful
+connection reset will trigger another automatic wakeup of all tasks
+on the xprt->pending rpc_wait_queue. In particular it will cause an
+early wake up of the task that called xprt_connect().
+
+All we really want to do here is clear all the socket-specific state
+flags, so we split that functionality out of xs_sock_mark_closed()
+into a helper that can be called by xs_abort_connection()
+
+Reported-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Tested-by: Chris Perl <chris.perl@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -1462,7 +1462,7 @@ static void xs_tcp_cancel_linger_timeout
+ 	xprt_clear_connecting(xprt);
+ }
+ 
+-static void xs_sock_mark_closed(struct rpc_xprt *xprt)
++static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
+ {
+ 	smp_mb__before_clear_bit();
+ 	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+@@ -1470,6 +1470,11 @@ static void xs_sock_mark_closed(struct r
+ 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ 	clear_bit(XPRT_CLOSING, &xprt->state);
+ 	smp_mb__after_clear_bit();
++}
++
++static void xs_sock_mark_closed(struct rpc_xprt *xprt)
++{
++	xs_sock_reset_connection_flags(xprt);
+ 	/* Mark transport as closed and wake up all pending tasks */
+ 	xprt_disconnect_done(xprt);
+ }
+@@ -2004,10 +2009,8 @@ static void xs_abort_connection(struct s
+ 	any.sa_family = AF_UNSPEC;
+ 	result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+ 	if (!result)
+-		xs_sock_mark_closed(&transport->xprt);
+-	else
+-		dprintk("RPC:       AF_UNSPEC connect return code %d\n",
+-				result);
++		xs_sock_reset_connection_flags(&transport->xprt);
++	dprintk("RPC:       AF_UNSPEC connect return code %d\n", result);
+ }
+ 
+ static void xs_tcp_reuse_connection(struct sock_xprt *transport)
diff --git a/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch b/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch
new file mode 100644
index 00000000000..c28845d5c85
--- /dev/null
+++ b/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch
@@ -0,0 +1,82 @@
+From 6ede1fd3cb404c0016de6ac529df46d561bd558b Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Mon, 22 Oct 2012 16:35:18 -0700
+Subject: x86, mm: Trim memory in memblock to be page aligned
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit 6ede1fd3cb404c0016de6ac529df46d561bd558b upstream.
+
+We will not map partial pages, so need to make sure memblock
+allocation will not allocate those bytes out.
+
+Also we will use for_each_mem_pfn_range() to loop to map memory
+range to keep them consistent.
+
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com
+Acked-by: Jacob Shin <jacob.shin@amd.com>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/e820.c   |    3 +++
+ include/linux/memblock.h |    1 +
+ mm/memblock.c            |   24 ++++++++++++++++++++++++
+ 3 files changed, 28 insertions(+)
+
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -1076,6 +1076,9 @@ void __init memblock_x86_fill(void)
+ 		memblock_add(ei->addr, ei->size);
+ 	}
+ 
++	/* throw away partial pages */
++	memblock_trim_memory(PAGE_SIZE);
++
+ 	memblock_dump_all();
+ }
+ 
+--- a/include/linux/memblock.h
++++ b/include/linux/memblock.h
+@@ -57,6 +57,7 @@ int memblock_add(phys_addr_t base, phys_
+ int memblock_remove(phys_addr_t base, phys_addr_t size);
+ int memblock_free(phys_addr_t base, phys_addr_t size);
+ int memblock_reserve(phys_addr_t base, phys_addr_t size);
++void memblock_trim_memory(phys_addr_t align);
+ 
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -908,6 +908,30 @@ int __init_memblock memblock_is_region_r
+ 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
+ }
+ 
++void __init_memblock memblock_trim_memory(phys_addr_t align)
++{
++	int i;
++	phys_addr_t start, end, orig_start, orig_end;
++	struct memblock_type *mem = &memblock.memory;
++
++	for (i = 0; i < mem->cnt; i++) {
++		orig_start = mem->regions[i].base;
++		orig_end = mem->regions[i].base + mem->regions[i].size;
++		start = round_up(orig_start, align);
++		end = round_down(orig_end, align);
++
++		if (start == orig_start && end == orig_end)
++			continue;
++
++		if (start < end) {
++			mem->regions[i].base = start;
++			mem->regions[i].size = end - start;
++		} else {
++			memblock_remove_region(mem, i);
++			i--;
++		}
++	}
++}
+ 
+ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
+ {