From: Greg Kroah-Hartman Date: Fri, 26 Oct 2012 19:36:48 +0000 (-0700) Subject: 3.6-stable patches X-Git-Tag: v3.0.49~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cbd1fe432ad48567b66e51fb596e23e297edbb8c;p=thirdparty%2Fkernel%2Fstable-queue.git 3.6-stable patches added patches: lockd-clear-ln-nsm_clnt-only-when-ln-nsm_users-is-zero.patch lockd-fix-races-in-nsm_client_get.patch mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch revert-x86-mm-fix-the-size-calculation-of-mapping-tables.patch sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch sunrpc-prevent-races-in-xs_abort_connection.patch x86-64-fix-page-table-accounting.patch x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch --- diff --git a/queue-3.6/lockd-clear-ln-nsm_clnt-only-when-ln-nsm_users-is-zero.patch b/queue-3.6/lockd-clear-ln-nsm_clnt-only-when-ln-nsm_users-is-zero.patch new file mode 100644 index 00000000000..77b0afaf999 --- /dev/null +++ b/queue-3.6/lockd-clear-ln-nsm_clnt-only-when-ln-nsm_users-is-zero.patch @@ -0,0 +1,46 @@ +From e498daa81295d02f7359af313c2b7f87e1062207 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Wed, 24 Oct 2012 08:53:35 -0400 +Subject: LOCKD: Clear ln->nsm_clnt only when ln->nsm_users is zero + +From: Trond Myklebust + +commit e498daa81295d02f7359af313c2b7f87e1062207 upstream. + +The current code is clearing it in all cases _except_ when zero. + +Reported-by: Stanislav Kinsbursky +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/lockd/mon.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/fs/lockd/mon.c ++++ b/fs/lockd/mon.c +@@ -124,18 +124,16 @@ out: + static void nsm_client_put(struct net *net) + { + struct lockd_net *ln = net_generic(net, lockd_net_id); +- struct rpc_clnt *clnt = ln->nsm_clnt; +- int shutdown = 0; ++ struct rpc_clnt *clnt = NULL; + + spin_lock(&ln->nsm_clnt_lock); +- if (ln->nsm_users) { +- if (--ln->nsm_users) +- ln->nsm_clnt = NULL; +- shutdown = !ln->nsm_users; ++ ln->nsm_users--; ++ if (ln->nsm_users == 0) { ++ clnt = ln->nsm_clnt; ++ ln->nsm_clnt = NULL; + } + spin_unlock(&ln->nsm_clnt_lock); +- +- if (shutdown) ++ if (clnt != NULL) + rpc_shutdown_client(clnt); + } + diff --git a/queue-3.6/lockd-fix-races-in-nsm_client_get.patch b/queue-3.6/lockd-fix-races-in-nsm_client_get.patch new file mode 100644 index 00000000000..e1f537df302 --- /dev/null +++ b/queue-3.6/lockd-fix-races-in-nsm_client_get.patch @@ -0,0 +1,81 @@ +From a4ee8d978e47e79d536226dccb48991f70091168 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 13:51:58 -0400 +Subject: LOCKD: fix races in nsm_client_get + +From: Trond Myklebust + +commit a4ee8d978e47e79d536226dccb48991f70091168 upstream. + +Commit e9406db20fecbfcab646bad157b4cfdc7cadddfb (lockd: per-net +NSM client creation and destruction helpers introduced) contains +a nasty race on initialisation of the per-net NSM client because +it doesn't check whether or not the client is set after grabbing +the nsm_create_mutex. + +Reported-by: Nix +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/lockd/mon.c | 43 ++++++++++++++++++++++++++----------------- + 1 file changed, 26 insertions(+), 17 deletions(-) + +--- a/fs/lockd/mon.c ++++ b/fs/lockd/mon.c +@@ -85,29 +85,38 @@ static struct rpc_clnt *nsm_create(struc + return rpc_create(&args); + } + ++static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, ++ struct rpc_clnt *clnt) ++{ ++ spin_lock(&ln->nsm_clnt_lock); ++ if (ln->nsm_users == 0) { ++ if (clnt == NULL) ++ goto out; ++ ln->nsm_clnt = clnt; ++ } ++ clnt = ln->nsm_clnt; ++ ln->nsm_users++; ++out: ++ spin_unlock(&ln->nsm_clnt_lock); ++ return clnt; ++} ++ + static struct rpc_clnt *nsm_client_get(struct net *net) + { +- static DEFINE_MUTEX(nsm_create_mutex); +- struct rpc_clnt *clnt; ++ struct rpc_clnt *clnt, *new; + struct lockd_net *ln = net_generic(net, lockd_net_id); + +- spin_lock(&ln->nsm_clnt_lock); +- if (ln->nsm_users) { +- ln->nsm_users++; +- clnt = ln->nsm_clnt; +- spin_unlock(&ln->nsm_clnt_lock); ++ clnt = nsm_client_set(ln, NULL); ++ if (clnt != NULL) + goto out; +- } +- spin_unlock(&ln->nsm_clnt_lock); + +- mutex_lock(&nsm_create_mutex); +- clnt = nsm_create(net); +- if (!IS_ERR(clnt)) { +- ln->nsm_clnt = clnt; +- smp_wmb(); +- ln->nsm_users = 1; +- } +- mutex_unlock(&nsm_create_mutex); ++ clnt = new = nsm_create(net); ++ if (IS_ERR(clnt)) ++ goto out; ++ ++ clnt = nsm_client_set(ln, new); ++ if (clnt != new) ++ rpc_shutdown_client(new); + out: + return clnt; + } diff --git a/queue-3.6/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch b/queue-3.6/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch new file mode 100644 index 00000000000..0b137c20b28 --- /dev/null +++ b/queue-3.6/mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch @@ -0,0 +1,107 @@ +From ef5d437f71afdf4afdbab99213add99f4b1318fd Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 25 Oct 2012 13:37:31 -0700 +Subject: mm: fix XFS oops due to dirty pages without buffers on s390 + +From: Jan Kara + +commit ef5d437f71afdf4afdbab99213add99f4b1318fd upstream. + +On s390 any write to a page (even from kernel itself) sets architecture +specific page dirty bit. Thus when a page is written to via buffered +write, HW dirty bit gets set and when we later map and unmap the page, +page_remove_rmap() finds the dirty bit and calls set_page_dirty(). + +Dirtying of a page which shouldn't be dirty can cause all sorts of +problems to filesystems. The bug we observed in practice is that +buffers from the page get freed, so when the page gets later marked as +dirty and writeback writes it, XFS crashes due to an assertion +BUG_ON(!PagePrivate(page)) in page_buffers() called from +xfs_count_page_state(). + +Similar problem can also happen when zero_user_segment() call from +xfs_vm_writepage() (or block_write_full_page() for that matter) set the +hardware dirty bit during writeback, later buffers get freed, and then +page unmapped. + +Fix the issue by ignoring s390 HW dirty bit for page cache pages of +mappings with mapping_cap_account_dirty(). This is safe because for +such mappings when a page gets marked as writeable in PTE it is also +marked dirty in do_wp_page() or do_page_fault(). When the dirty bit is +cleared by clear_page_dirty_for_io(), the page gets writeprotected in +page_mkclean(). So pagecache page is writeable if and only if it is +dirty. + +Thanks to Hugh Dickins for pointing out mapping has to have +mapping_cap_account_dirty() for things to work and proposing a cleaned +up variant of the patch. + +The patch has survived about two hours of running fsx-linux on tmpfs +while heavily swapping and several days of running on out build machines +where the original problem was triggered. + +Signed-off-by: Jan Kara +Cc: Martin Schwidefsky +Cc: Mel Gorman +Cc: Hugh Dickins +Cc: Heiko Carstens +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/rmap.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + +@@ -971,11 +972,8 @@ int page_mkclean(struct page *page) + + if (page_mapped(page)) { + struct address_space *mapping = page_mapping(page); +- if (mapping) { ++ if (mapping) + ret = page_mkclean_file(mapping, page); +- if (page_test_and_clear_dirty(page_to_pfn(page), 1)) +- ret = 1; +- } + } + + return ret; +@@ -1161,6 +1159,7 @@ void page_add_file_rmap(struct page *pag + */ + void page_remove_rmap(struct page *page) + { ++ struct address_space *mapping = page_mapping(page); + bool anon = PageAnon(page); + bool locked; + unsigned long flags; +@@ -1183,8 +1182,19 @@ void page_remove_rmap(struct page *page) + * this if the page is anon, so about to be freed; but perhaps + * not if it's in swapcache - there might be another pte slot + * containing the swap entry, but page not yet written to swap. ++ * ++ * And we can skip it on file pages, so long as the filesystem ++ * participates in dirty tracking; but need to catch shm and tmpfs ++ * and ramfs pages which have been modified since creation by read ++ * fault. ++ * ++ * Note that mapping must be decided above, before decrementing ++ * mapcount (which luckily provides a barrier): once page is unmapped, ++ * it could be truncated and page->mapping reset to NULL at any moment. ++ * Note also that we are relying on page_mapping(page) to set mapping ++ * to &swapper_space when PageSwapCache(page). + */ +- if ((!anon || PageSwapCache(page)) && ++ if (mapping && !mapping_cap_account_dirty(mapping) && + page_test_and_clear_dirty(page_to_pfn(page), 1)) + set_page_dirty(page); + /* diff --git a/queue-3.6/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch b/queue-3.6/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch new file mode 100644 index 00000000000..19f834fe19f --- /dev/null +++ b/queue-3.6/revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch @@ -0,0 +1,38 @@ +From b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 11:40:02 -0400 +Subject: Revert "SUNRPC: Ensure we close the socket on EPIPE errors too..." + +From: Trond Myklebust + +commit b9d2bb2ee537424a7f855e1f93eed44eb9ee0854 upstream. + +This reverts commit 55420c24a0d4d1fce70ca713f84aa00b6b74a70e. +Now that we clear the connected flag when entering TCP_CLOSE_WAIT, +the deadlock described in this commit is no longer possible. +Instead, the resulting call to xs_tcp_shutdown() can interfere +with pending reconnection attempts. + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -736,10 +736,10 @@ static int xs_tcp_send_request(struct rp + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -ECONNRESET: +- case -EPIPE: + xs_tcp_shutdown(xprt); + case -ECONNREFUSED: + case -ENOTCONN: ++ case -EPIPE: + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + } + diff --git a/queue-3.6/revert-x86-mm-fix-the-size-calculation-of-mapping-tables.patch b/queue-3.6/revert-x86-mm-fix-the-size-calculation-of-mapping-tables.patch new file mode 100644 index 00000000000..372008e8363 --- /dev/null +++ b/queue-3.6/revert-x86-mm-fix-the-size-calculation-of-mapping-tables.patch @@ -0,0 +1,102 @@ +From 7b16bbf97375d9fb7fc107b3f80afeb94a204e44 Mon Sep 17 00:00:00 2001 +From: Dave Young +Date: Thu, 18 Oct 2012 14:33:23 +0800 +Subject: Revert "x86/mm: Fix the size calculation of mapping tables" + +From: Dave Young + +commit 7b16bbf97375d9fb7fc107b3f80afeb94a204e44 upstream. + +Commit: + + 722bc6b16771 x86/mm: Fix the size calculation of mapping tables + +Tried to address the issue that the first 2/4M should use 4k pages +if PSE enabled, but extra counts should only be valid for x86_32. + +This commit caused a kdump regression: the kdump kernel hangs. + +Work is in progress to fundamentally fix the various page table +initialization issues that we have, via the design suggested +by H. Peter Anvin, but it's not ready yet to be merged. + +So, to get a working kdump revert to the last known working version, +which is the revert of this commit and of a followup fix (which was +incomplete): + + bd2753b2dda7 x86/mm: Only add extra pages count for the first memory range during pre-allocation + +Tested kdump on physical and virtual machines. + +Signed-off-by: Dave Young +Acked-by: Yinghai Lu +Acked-by: Cong Wang +Acked-by: Flavio Leitner +Tested-by: Flavio Leitner +Cc: Dan Carpenter +Cc: Cong Wang +Cc: Flavio Leitner +Cc: Tejun Heo +Cc: ianfang.cn@gmail.com +Cc: Vivek Goyal +Cc: Linus Torvalds +Cc: Andrew Morton +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/init.c | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) + +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -29,14 +29,8 @@ int direct_gbpages + #endif + ; + +-struct map_range { +- unsigned long start; +- unsigned long end; +- unsigned page_size_mask; +-}; +- +-static void __init find_early_table_space(struct map_range *mr, unsigned long end, +- int use_pse, int use_gbpages) ++static void __init find_early_table_space(unsigned long end, int use_pse, ++ int use_gbpages) + { + unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + phys_addr_t base; +@@ -61,10 +55,6 @@ static void __init find_early_table_spac + #ifdef CONFIG_X86_32 + extra += PMD_SIZE; + #endif +- /* The first 2/4M doesn't use large pages. */ +- if (mr->start < PMD_SIZE) +- extra += mr->end - mr->start; +- + ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else + ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; +@@ -95,6 +85,12 @@ void __init native_pagetable_reserve(u64 + memblock_reserve(start, end - start); + } + ++struct map_range { ++ unsigned long start; ++ unsigned long end; ++ unsigned page_size_mask; ++}; ++ + #ifdef CONFIG_X86_32 + #define NR_RANGE_MR 3 + #else /* CONFIG_X86_64 */ +@@ -267,7 +263,7 @@ unsigned long __init_refok init_memory_m + * nodes are discovered. + */ + if (!after_bootmem) +- find_early_table_space(&mr[0], end, use_pse, use_gbpages); ++ find_early_table_space(end, use_pse, use_gbpages); + + for (i = 0; i < nr_range; i++) + ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/queue-3.6/series b/queue-3.6/series index 762eb44a96b..6cbbd4e244b 100644 --- a/queue-3.6/series +++ b/queue-3.6/series @@ -8,3 +8,13 @@ fs-compat_ioctl.c-video_set_spu_palette-missing-error-check.patch drivers-rtc-rtc-imxdi.c-add-missing-spin-lock-initialization.patch genalloc-stop-crashing-the-system-when-destroying-a-pool.patch arm-7559-1-smp-switch-away-from-the-idmap-before-updating-init_mm.mm_count.patch +x86-64-fix-page-table-accounting.patch +revert-x86-mm-fix-the-size-calculation-of-mapping-tables.patch +x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch +mm-fix-xfs-oops-due-to-dirty-pages-without-buffers-on-s390.patch +sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch +sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch +revert-sunrpc-ensure-we-close-the-socket-on-epipe-errors-too.patch +lockd-fix-races-in-nsm_client_get.patch +sunrpc-prevent-races-in-xs_abort_connection.patch +lockd-clear-ln-nsm_clnt-only-when-ln-nsm_users-is-zero.patch diff --git a/queue-3.6/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch b/queue-3.6/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch new file mode 100644 index 00000000000..118ef7c9837 --- /dev/null +++ b/queue-3.6/sunrpc-clear-the-connect-flag-when-socket-state-is-tcp_close_wait.patch @@ -0,0 +1,30 @@ +From d0bea455dd48da1ecbd04fedf00eb89437455fdc Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 11:35:47 -0400 +Subject: SUNRPC: Clear the connect flag when socket state is TCP_CLOSE_WAIT + +From: Trond Myklebust + +commit d0bea455dd48da1ecbd04fedf00eb89437455fdc upstream. + +This is needed to ensure that we call xprt_connect() upon the next +call to call_connect(). + +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1522,6 +1522,7 @@ static void xs_tcp_state_change(struct s + case TCP_CLOSE_WAIT: + /* The server initiated a shutdown of the socket */ + xprt->connect_cookie++; ++ clear_bit(XPRT_CONNECTED, &xprt->state); + xs_tcp_force_close(xprt); + case TCP_CLOSING: + /* diff --git a/queue-3.6/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch b/queue-3.6/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch new file mode 100644 index 00000000000..c43b858e5cf --- /dev/null +++ b/queue-3.6/sunrpc-get-rid-of-the-xs_error_report-socket-callback.patch @@ -0,0 +1,105 @@ +From f878b657ce8e7d3673afe48110ec208a29e38c4a Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Mon, 22 Oct 2012 17:14:36 -0400 +Subject: SUNRPC: Get rid of the xs_error_report socket callback + +From: Trond Myklebust + +commit f878b657ce8e7d3673afe48110ec208a29e38c4a upstream. + +Chris Perl reports that we're seeing races between the wakeup call in +xs_error_report and the connect attempts. Basically, Chris has shown +that in certain circumstances, the call to xs_error_report causes the +rpc_task that is responsible for reconnecting to wake up early, thus +triggering a disconnect and retry. + +Since the sk->sk_error_report() calls in the socket layer are always +followed by a tcp_done() in the cases where we care about waking up +the rpc_tasks, just let the state_change callbacks take responsibility +for those wake ups. + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 25 ------------------------- + 1 file changed, 25 deletions(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -254,7 +254,6 @@ struct sock_xprt { + void (*old_data_ready)(struct sock *, int); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); +- void (*old_error_report)(struct sock *); + }; + + /* +@@ -781,7 +780,6 @@ static void xs_save_old_callbacks(struct + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; +- transport->old_error_report = sk->sk_error_report; + } + + static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) +@@ -789,7 +787,6 @@ static void xs_restore_old_callbacks(str + sk->sk_data_ready = transport->old_data_ready; + sk->sk_state_change = transport->old_state_change; + sk->sk_write_space = transport->old_write_space; +- sk->sk_error_report = transport->old_error_report; + } + + static void xs_reset_transport(struct sock_xprt *transport) +@@ -1549,25 +1546,6 @@ static void xs_tcp_state_change(struct s + read_unlock_bh(&sk->sk_callback_lock); + } + +-/** +- * xs_error_report - callback mainly for catching socket errors +- * @sk: socket +- */ +-static void xs_error_report(struct sock *sk) +-{ +- struct rpc_xprt *xprt; +- +- read_lock_bh(&sk->sk_callback_lock); +- if (!(xprt = xprt_from_sock(sk))) +- goto out; +- dprintk("RPC: %s client %p...\n" +- "RPC: error %d\n", +- __func__, xprt, sk->sk_err); +- xprt_wake_pending_tasks(xprt, -EAGAIN); +-out: +- read_unlock_bh(&sk->sk_callback_lock); +-} +- + static void xs_write_space(struct sock *sk) + { + struct socket *sock; +@@ -1867,7 +1845,6 @@ static int xs_local_finish_connecting(st + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); +@@ -1995,7 +1972,6 @@ static void xs_udp_finish_connecting(str + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + +@@ -2113,7 +2089,6 @@ static int xs_tcp_finish_connecting(stru + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; +- sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ diff --git a/queue-3.6/sunrpc-prevent-races-in-xs_abort_connection.patch b/queue-3.6/sunrpc-prevent-races-in-xs_abort_connection.patch new file mode 100644 index 00000000000..6b45031d9ad --- /dev/null +++ b/queue-3.6/sunrpc-prevent-races-in-xs_abort_connection.patch @@ -0,0 +1,63 @@ +From 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 23 Oct 2012 17:50:07 -0400 +Subject: SUNRPC: Prevent races in xs_abort_connection() + +From: Trond Myklebust + +commit 4bc1e68ed6a8b59be8a79eb719be515a55c7bc68 upstream. + +The call to xprt_disconnect_done() that is triggered by a successful +connection reset will trigger another automatic wakeup of all tasks +on the xprt->pending rpc_wait_queue. In particular it will cause an +early wake up of the task that called xprt_connect(). + +All we really want to do here is clear all the socket-specific state +flags, so we split that functionality out of xs_sock_mark_closed() +into a helper that can be called by xs_abort_connection() + +Reported-by: Chris Perl +Signed-off-by: Trond Myklebust +Tested-by: Chris Perl +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/xprtsock.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -1459,7 +1459,7 @@ static void xs_tcp_cancel_linger_timeout + xprt_clear_connecting(xprt); + } + +-static void xs_sock_mark_closed(struct rpc_xprt *xprt) ++static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) + { + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); +@@ -1467,6 +1467,11 @@ static void xs_sock_mark_closed(struct r + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); ++} ++ ++static void xs_sock_mark_closed(struct rpc_xprt *xprt) ++{ ++ xs_sock_reset_connection_flags(xprt); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); + } +@@ -2042,10 +2047,8 @@ static void xs_abort_connection(struct s + any.sa_family = AF_UNSPEC; + result = kernel_connect(transport->sock, &any, sizeof(any), 0); + if (!result) +- xs_sock_mark_closed(&transport->xprt); +- else +- dprintk("RPC: AF_UNSPEC connect return code %d\n", +- result); ++ xs_sock_reset_connection_flags(&transport->xprt); ++ dprintk("RPC: AF_UNSPEC connect return code %d\n", result); + } + + static void xs_tcp_reuse_connection(struct sock_xprt *transport) diff --git a/queue-3.6/x86-64-fix-page-table-accounting.patch b/queue-3.6/x86-64-fix-page-table-accounting.patch new file mode 100644 index 00000000000..af2c07a1868 --- /dev/null +++ b/queue-3.6/x86-64-fix-page-table-accounting.patch @@ -0,0 +1,59 @@ +From 876ee61aadf01aa0db981b5d249cbdd53dc28b5e Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Thu, 4 Oct 2012 14:48:10 +0100 +Subject: x86-64: Fix page table accounting + +From: Jan Beulich + +commit 876ee61aadf01aa0db981b5d249cbdd53dc28b5e upstream. + +Commit 20167d3421a089a1bf1bd680b150dc69c9506810 ("x86-64: Fix +accounting in kernel_physical_mapping_init()") went a little too +far by entirely removing the counting of pre-populated page +tables: this should be done at boot time (to cover the page +tables set up in early boot code), but shouldn't be done during +memory hot add. + +Hence, re-add the removed increments of "pages", but make them +and the one in phys_pte_init() conditional upon !after_bootmem. + +Reported-Acked-and-Tested-by: Hugh Dickins +Signed-off-by: Jan Beulich +Link: http://lkml.kernel.org/r/506DAFBA020000780009FA8C@nat28.tlf.novell.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/init_64.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned + * these mappings are more intelligent. + */ + if (pte_val(*pte)) { +- pages++; ++ if (!after_bootmem) ++ pages++; + continue; + } + +@@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_2M)) { ++ if (!after_bootmem) ++ pages++; + last_map_addr = next; + continue; + } +@@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_1G)) { ++ if (!after_bootmem) ++ pages++; + last_map_addr = next; + continue; + } diff --git a/queue-3.6/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch b/queue-3.6/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch new file mode 100644 index 00000000000..ebc2f2fcaf4 --- /dev/null +++ b/queue-3.6/x86-mm-trim-memory-in-memblock-to-be-page-aligned.patch @@ -0,0 +1,82 @@ +From 6ede1fd3cb404c0016de6ac529df46d561bd558b Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Mon, 22 Oct 2012 16:35:18 -0700 +Subject: x86, mm: Trim memory in memblock to be page aligned + +From: Yinghai Lu + +commit 6ede1fd3cb404c0016de6ac529df46d561bd558b upstream. + +We will not map partial pages, so need to make sure memblock +allocation will not allocate those bytes out. + +Also we will use for_each_mem_pfn_range() to loop to map memory +range to keep them consistent. + +Signed-off-by: Yinghai Lu +Link: http://lkml.kernel.org/r/CAE9FiQVZirvaBMFYRfXMmWEcHbKSicQEHz4VAwUv0xFCk51ZNw@mail.gmail.com +Acked-by: Jacob Shin +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/e820.c | 3 +++ + include/linux/memblock.h | 1 + + mm/memblock.c | 24 ++++++++++++++++++++++++ + 3 files changed, 28 insertions(+) + +--- a/arch/x86/kernel/e820.c ++++ b/arch/x86/kernel/e820.c +@@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void) + memblock_add(ei->addr, ei->size); + } + ++ /* throw away partial pages */ ++ memblock_trim_memory(PAGE_SIZE); ++ + memblock_dump_all(); + } + +--- a/include/linux/memblock.h ++++ b/include/linux/memblock.h +@@ -57,6 +57,7 @@ int memblock_add(phys_addr_t base, phys_ + int memblock_remove(phys_addr_t base, phys_addr_t size); + int memblock_free(phys_addr_t base, phys_addr_t size); + int memblock_reserve(phys_addr_t base, phys_addr_t size); ++void memblock_trim_memory(phys_addr_t align); + + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -929,6 +929,30 @@ int __init_memblock memblock_is_region_r + return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; + } + ++void __init_memblock memblock_trim_memory(phys_addr_t align) ++{ ++ int i; ++ phys_addr_t start, end, orig_start, orig_end; ++ struct memblock_type *mem = &memblock.memory; ++ ++ for (i = 0; i < mem->cnt; i++) { ++ orig_start = mem->regions[i].base; ++ orig_end = mem->regions[i].base + mem->regions[i].size; ++ start = round_up(orig_start, align); ++ end = round_down(orig_end, align); ++ ++ if (start == orig_start && end == orig_end) ++ continue; ++ ++ if (start < end) { ++ mem->regions[i].base = start; ++ mem->regions[i].size = end - start; ++ } else { ++ memblock_remove_region(mem, i); ++ i--; ++ } ++ } ++} + + void __init_memblock memblock_set_current_limit(phys_addr_t limit) + {