From: Greg Kroah-Hartman Date: Fri, 26 Oct 2012 19:36:43 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.0.49~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8c71e98e962e1e02cf549288ba0cdf901431b7dd;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch sunrpc-prevent-races-in-xs_abort_connection.patch x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch --- diff --git a/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch b/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch new file mode 100644 index 00000000000..e7360279f65 --- /dev/null +++ b/queue-3.4/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch @@ -0,0 +1,107 @@ +From ef5d437f71afdf4afdbab99213add99f4b1318fd Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 25 Oct 2012 13:37:31 -0700 +Subject: mm: fix XFS oops due to dirty pages without buffers on s390 + +From: Jan Kara + +commit ef5d437f71afdf4afdbab99213add99f4b1318fd upstream. + +On s390 any write to a page (even from kernel itself) sets architecture +specific page dirty bit. Thus when a page is written to via buffered +write, HW dirty bit gets set and when we later map and unmap the page, +page_remove_rmap() finds the dirty bit and calls set_page_dirty(). + +Dirtying of a page which shouldn't be dirty can cause all sorts of +problems to filesystems. The bug we observed in practice is that +buffers from the page get freed, so when the page gets later marked as +dirty and writeback writes it, XFS crashes due to an assertion +BUG_ON(!PagePrivate(page)) in page_buffers() called from +xfs_count_page_state(). + +Similar problem can also happen when zero_user_segment() call from +xfs_vm_writepage() (or block_write_full_page() for that matter) set the +hardware dirty bit during writeback, later buffers get freed, and then +page unmapped. + +Fix the issue by ignoring s390 HW dirty bit for page cache pages of +mappings with mapping_cap_account_dirty(). This is safe because for +such mappings when a page gets marked as writeable in PTE it is also +marked dirty in do_wp_page() or do_page_fault(). When the dirty bit is +cleared by clear_page_dirty_for_io(), the page gets writeprotected in +page_mkclean(). So pagecache page is writeable if and only if it is +dirty. + +Thanks to Hugh Dickins for pointing out mapping has to have +mapping_cap_account_dirty() for things to work and proposing a cleaned +up variant of the patch. + +The patch has survived about two hours of running fsx-linux on tmpfs +while heavily swapping and several days of running on out build machines +where the original problem was triggered. + +Signed-off-by: Jan Kara +Cc: Martin Schwidefsky +Cc: Mel Gorman +Cc: Hugh Dickins +Cc: Heiko Carstens +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/rmap.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + +@@ -977,11 +978,8 @@ int page_mkclean(struct page *page) + + if (page_mapped(page)) { + struct address_space *mapping = page_mapping(page); +- if (mapping) { ++ if (mapping) + ret = page_mkclean_file(mapping, page); +- if (page_test_and_clear_dirty(page_to_pfn(page), 1)) +- ret = 1; +- } + } + + return ret; +@@ -1167,6 +1165,7 @@ void page_add_file_rmap(struct page *pag + */ + void page_remove_rmap(struct page *page) + { ++ struct address_space *mapping = page_mapping(page); + bool anon = PageAnon(page); + bool locked; + unsigned long flags; +@@ -1189,8 +1188,19 @@ void page_remove_rmap(struct page *page) + * this if the page is anon, so about to be freed; but perhaps + * not if it's in swapcache - there might be another pte slot + * containing the swap entry, but page not yet written to swap. ++ * ++ * And we can skip it on file pages, so long as the filesystem ++ * participates in dirty tracking; but need to catch shm and tmpfs ++ * and ramfs pages which have been modified since creation by read ++ * fault. ++ * ++ * Note that mapping must be decided above, before decrementing ++ * mapcount (which luckily provides a barrier): once page is unmapped, ++ * it could be truncated and page->mapping reset to NULL at any moment. ++ * Note also that we are relying on page_mapping(page) to set mapping ++ * to &swapper_space when PageSwapCache(page). + */ +- if ((!anon || PageSwapCache(page)) && ++ if (mapping && !mapping_cap_account_dirty(mapping) && + page_test_and_clear_dirty(page_to_pfn(page), 1)) + set_page_dirty(page); + /* diff --git a/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch b/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch new file mode 100644 index 00000000000..19f834fe19f --- /dev/null +++ b/queue-3.4/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch @@ -0,0 +1,38 @@ +From b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 11:40:02 -0400 +Subject: Revert "SUNRPC: Ensure we close the socket on EPIPE errors too..." + +From: Trond Myklebust + +commit b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 upstream. + +This reverts commit 55420c24a0d4d1fce70ca713f84aa00b6b74a70e. +Now that we clear the connected flag when entering TCP_CLOSE_WAIT, +the deadlock described in this commit is no longer possible. +Instead, the resulting call to xs_tcp_shutdown() can interfere +with pending reconnection attempts. + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -736,10 +736,10 @@ static int xs_tcp_send_request(struct rp + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -ECONNRESET: +- case -EPIPE: + xs_tcp_shutdown(xprt); + case -ECONNREFUSED: + case -ENOTCONN: ++ case -EPIPE: + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + } + diff --git a/queue-3.4/series b/queue-3.4/series index c69d8f99dd4..a60076de1f6 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -6,3 +6,9 @@ fs-compat_ioctl.c-video_set_spu_palette-missing-error-check.patch drivers-rtc-rtc-imxdi.c-add-missing-spin-lock-initialization.patch genalloc-stop-crashing-the-system-when-destroying-a-pool.patch arm-7559-1-smp-switch-away-from-the-idmap-before-updating-init_mm.mm_count.patch +x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch +mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch +sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch +sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch +revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch +sunrpc-prevent-races-in-xs_abort_connection.patch diff --git a/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch b/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch new file mode 100644 index 00000000000..548e1b6e3be --- /dev/null +++ b/queue-3.4/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch @@ -0,0 +1,30 @@ +From d0bea455dd48da1ecbd04fedf00eb89437455fdc Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 11:35:47 -0400 +Subject: SUNRPC: Clear the connect flag when socket state is TCP_CLOSE_WAIT + +From: Trond Myklebust + +commit d0bea455dd48da1ecbd04fedf00eb89437455fdc upstream. + +This is needed to ensure that we call xprt_connect() upon the next +call to call_connect(). + +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1525,6 +1525,7 @@ static void xs_tcp_state_change(struct s + case TCP_CLOSE_WAIT: + /* The server initiated a shutdown of the socket */ + xprt->connect_cookie++; ++ clear_bit(XPRT_CONNECTED, &xprt->state); + xs_tcp_force_close(xprt); + case TCP_CLOSING: + /* diff --git a/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch b/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch new file mode 100644 index 00000000000..b055dfc0111 --- /dev/null +++ b/queue-3.4/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch @@ -0,0 +1,105 @@ +From f878b657ce8e7d3673afe48110ec208a29e38c4a Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Mon, 22 Oct 2012 17:14:36 -0400 +Subject: SUNRPC: Get rid of the xs_error_report socket callback + +From: Trond Myklebust + +commit f878b657ce8e7d3673afe48110ec208a29e38c4a upstream. + +Chris Perl reports that we're seeing races between the wakeup call in +xs_error_report and the connect attempts. Basically, Chris has shown +that in certain circumstances, the call to xs_error_report causes the +rpc_task that is responsible for reconnecting to wake up early, thus +triggering a disconnect and retry. + +Since the sk->sk_error_report() calls in the socket layer are always +followed by a tcp_done() in the cases where we care about waking up +the rpc_tasks, just let the state_change callbacks take responsibility +for those wake ups. + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 25 ------------------------- + 1 file changed, 25 deletions(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -254,7 +254,6 @@ struct sock_xprt { + void (*old_data_ready)(struct sock *, int); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); +- void (*old_error_report)(struct sock *); + }; + + /* +@@ -781,7 +780,6 @@ static void xs_save_old_callbacks(struct + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; +- transport->old_error_report = sk->sk_error_report; + } + + static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) +@@ -789,7 +787,6 @@ static void xs_restore_old_callbacks(str + sk->sk_data_ready = transport->old_data_ready; + sk->sk_state_change = transport->old_state_change; + sk->sk_write_space = transport->old_write_space; +- sk->sk_error_report = transport->old_error_report; + } + + static void xs_reset_transport(struct sock_xprt *transport) +@@ -1552,25 +1549,6 @@ static void xs_tcp_state_change(struct s + read_unlock_bh(&sk->sk_callback_lock); + } + +-/** +- * xs_error_report - callback mainly for catching socket errors +- * @sk: socket +- */ +-static void xs_error_report(struct sock *sk) +-{ +- struct rpc_xprt *xprt; +- +- read_lock_bh(&sk->sk_callback_lock); +- if (!(xprt = xprt_from_sock(sk))) +- goto out; +- dprintk("RPC: %s client %p...\n" +- "RPC: error %d\n", +- __func__, xprt, sk->sk_err); +- xprt_wake_pending_tasks(xprt, -EAGAIN); +-out: +- read_unlock_bh(&sk->sk_callback_lock); +-} +- + static void xs_write_space(struct sock *sk) + { + struct socket *sock; +@@ -1870,7 +1848,6 @@ static int xs_local_finish_connecting(st + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); +@@ -1959,7 +1936,6 @@ static void xs_udp_finish_connecting(str + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + +@@ -2075,7 +2051,6 @@ static int xs_tcp_finish_connecting(stru + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ diff --git a/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch b/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch new file mode 100644 index 00000000000..5bef4a824e9 --- /dev/null +++ b/queue-3.4/sunrpc-prevent-races-in-xs_abort_connection.patch @@ -0,0 +1,63 @@ +From 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 17:50:07 -0400 +Subject: SUNRPC: Prevent races in xs_abort_connection() + +From: Trond Myklebust + +commit 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 upstream. + +The call to xprt_disconnect_done() that is triggered by a successful +connection reset will trigger another automatic wakeup of all tasks +on the xprt->pending rpc_wait_queue. In particular it will cause an +early wake up of the task that called xprt_connect(). + +All we really want to do here is clear all the socket-specific state +flags, so we split that functionality out of xs_sock_mark_closed() +into a helper that can be called by xs_abort_connection() + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1462,7 +1462,7 @@ static void xs_tcp_cancel_linger_timeout + xprt_clear_connecting(xprt); + } + +-static void xs_sock_mark_closed(struct rpc_xprt *xprt) ++static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) + { + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); +@@ -1470,6 +1470,11 @@ static void xs_sock_mark_closed(struct r + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); ++} ++ ++static void xs_sock_mark_closed(struct rpc_xprt *xprt) ++{ ++ xs_sock_reset_connection_flags(xprt); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); + } +@@ -2004,10 +2009,8 @@ static void xs_abort_connection(struct s + any.sa_family = AF_UNSPEC; + result = kernel_connect(transport->sock, &any, sizeof(any), 0); + if (!result) +- xs_sock_mark_closed(&transport->xprt); +- else +- dprintk("RPC: AF_UNSPEC connect return code %d\n", +- result); ++ xs_sock_reset_connection_flags(&transport->xprt); ++ dprintk("RPC: AF_UNSPEC connect return code %d\n", result); + } + + static void xs_tcp_reuse_connection(struct sock_xprt *transport) diff --git a/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch b/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch new file mode 100644 index 00000000000..c28845d5c85 --- /dev/null +++ b/queue-3.4/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch @@ -0,0 +1,82 @@ +From 6ede1fd3cb404c0016de6ac529df46d561bd558b Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Mon, 22 Oct 2012 16:35:18 -0700 +Subject: x86, mm: Trim memory in memblock to be page aligned + +From: Yinghai Lu + +commit 6ede1fd3cb404c0016de6ac529df46d561bd558b upstream. + +We will not map partial pages, so need to make sure memblock +allocation will not allocate those bytes out. + +Also we will use for_each_mem_pfn_range() to loop to map memory +range to keep them consistent. + +Signed-off-by: Yinghai Lu +Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com +Acked-by: Jacob Shin +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/e820.c | 3 +++ + include/linux/memblock.h | 1 + + mm/memblock.c | 24 ++++++++++++++++++++++++ + 3 files changed, 28 insertions(+) + +--- a/arch/x86/kernel/e820.c ++++ b/arch/x86/kernel/e820.c +@@ -1076,6 +1076,9 @@ void __init memblock_x86_fill(void) + memblock_add(ei->addr, ei->size); + } + ++ /* throw away partial pages */ ++ memblock_trim_memory(PAGE_SIZE); ++ + memblock_dump_all(); + } + +--- a/include/linux/memblock.h ++++ b/include/linux/memblock.h +@@ -57,6 +57,7 @@ int memblock_add(phys_addr_t base, phys_ + int memblock_remove(phys_addr_t base, phys_addr_t size); + int memblock_free(phys_addr_t base, phys_addr_t size); + int memblock_reserve(phys_addr_t base, phys_addr_t size); ++void memblock_trim_memory(phys_addr_t align); + + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -908,6 +908,30 @@ int __init_memblock memblock_is_region_r + return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; + } + ++void __init_memblock memblock_trim_memory(phys_addr_t align) ++{ ++ int i; ++ phys_addr_t start, end, orig_start, orig_end; ++ struct memblock_type *mem = &memblock.memory; ++ ++ for (i = 0; i < mem->cnt; i++) { ++ orig_start = mem->regions[i].base; ++ orig_end = mem->regions[i].base + mem->regions[i].size; ++ start = round_up(orig_start, align); ++ end = round_down(orig_end, align); ++ ++ if (start == orig_start && end == orig_end) ++ continue; ++ ++ if (start < end) { ++ mem->regions[i].base = start; ++ mem->regions[i].size = end - start; ++ } else { ++ memblock_remove_region(mem, i); ++ i--; ++ } ++ } ++} + + void __init_memblock memblock_set_current_limit(phys_addr_t limit) + {