From 97d56ae0ceffaed082b389f2cc88ceb3702c597f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Oct 2015 17:47:50 -0700 Subject: [PATCH] 4.2-stable patches added patches: act_mirred-clear-sender-cpu-before-sending-to-tx.patch af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch bpf-clear-sender_cpu-before-xmit.patch bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch inet-fix-race-in-reqsk_queue_unlink.patch inet-fix-races-in-reqsk_queue_hash_req.patch ipv6-don-t-call-with-rt6_uncached_list_flush_dev.patch l2tp-protect-tunnel-del_work-by-ref_count.patch net-add-pfmemalloc-check-in-sk_add_backlog.patch net-dsa-fix-preparation-of-a-port-stp-update.patch net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch net-unix-fix-logic-about-sk_peek_offset.patch netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch ovs-do-not-allocate-memory-from-offline-numa-node.patch ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch skbuff-fix-skb-checksum-flag-on-skb-pull.patch skbuff-fix-skb-checksum-partial-check.patch tipc-move-fragment-importance-field-to-new-header-position.patch --- ...lear-sender-cpu-before-sending-to-tx.patch | 35 ++++++ ...o-an-inline-function-for-type-safety.patch | 35 ++++++ ...iple-skbs-on-recv-with-msg_peek-flag.patch | 60 ++++++++++ .../bpf-clear-sender_cpu-before-xmit.patch | 31 +++++ ...get_filter-with-native-ebpf-programs.patch | 65 +++++++++++ ...d-of-kmalloc-for-ethtool_get_strings.patch | 33 ++++++ .../inet-fix-race-in-reqsk_queue_unlink.patch | 64 +++++++++++ ...et-fix-races-in-reqsk_queue_hash_req.patch | 52 +++++++++ ...all-with-rt6_uncached_list_flush_dev.patch | 72 ++++++++++++ ...protect-tunnel-del_work-by-ref_count.patch | 55 +++++++++ ...d-pfmemalloc-check-in-sk_add_backlog.patch | 49 ++++++++ ...fix-preparation-of-a-port-stp-update.patch | 47 ++++++++ ...umbers-for-correct-work-with-ethtool.patch | 40 +++++++ ...-unix-fix-logic-about-sk_peek_offset.patch | 67 +++++++++++ ...skb-to-alloc-size-to-avoid-msg_trunc.patch | 107 ++++++++++++++++++ ...locate-memory-from-offline-numa-node.patch | 35 ++++++ ...rride-sk-sk_state-in-pppoe_flush_dev.patch | 76 +++++++++++++ ...ff-fix-skb-checksum-flag-on-skb-pull.patch | 68 +++++++++++ ...kbuff-fix-skb-checksum-partial-check.patch | 58 ++++++++++ ...ortance-field-to-new-header-position.patch | 57 ++++++++++ 20 files changed, 1106 insertions(+) create mode 100644 queue-4.2/act_mirred-clear-sender-cpu-before-sending-to-tx.patch create mode 100644 queue-4.2/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch create mode 100644 queue-4.2/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch create mode 100644 queue-4.2/bpf-clear-sender_cpu-before-xmit.patch create mode 100644 queue-4.2/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch create mode 100644 queue-4.2/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch create mode 100644 queue-4.2/inet-fix-race-in-reqsk_queue_unlink.patch create mode 100644 queue-4.2/inet-fix-races-in-reqsk_queue_hash_req.patch create mode 100644 queue-4.2/ipv6-don-t-call-with-rt6_uncached_list_flush_dev.patch create mode 100644 queue-4.2/l2tp-protect-tunnel-del_work-by-ref_count.patch create mode 100644 queue-4.2/net-add-pfmemalloc-check-in-sk_add_backlog.patch create mode 100644 queue-4.2/net-dsa-fix-preparation-of-a-port-stp-update.patch create mode 100644 queue-4.2/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch create mode 100644 queue-4.2/net-unix-fix-logic-about-sk_peek_offset.patch create mode 100644 queue-4.2/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch create mode 100644 queue-4.2/ovs-do-not-allocate-memory-from-offline-numa-node.patch create mode 100644 queue-4.2/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch create mode 100644 queue-4.2/skbuff-fix-skb-checksum-flag-on-skb-pull.patch create mode 100644 queue-4.2/skbuff-fix-skb-checksum-partial-check.patch create mode 100644 queue-4.2/tipc-move-fragment-importance-field-to-new-header-position.patch diff --git a/queue-4.2/act_mirred-clear-sender-cpu-before-sending-to-tx.patch b/queue-4.2/act_mirred-clear-sender-cpu-before-sending-to-tx.patch new file mode 100644 index 00000000000..e27630868ae --- /dev/null +++ b/queue-4.2/act_mirred-clear-sender-cpu-before-sending-to-tx.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: WANG Cong +Date: Tue, 6 Oct 2015 17:23:47 -0700 +Subject: act_mirred: clear sender cpu before sending to tx + +From: WANG Cong + +[ Upstream commit d40496a56430eac0d330378816954619899fe303 ] + +Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") +the skb->sender_cpu needs to be cleared when moving from Rx +Tx, otherwise kernel could crash. + +Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") +Cc: Eric Dumazet +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: Cong Wang +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_mirred.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -168,6 +168,7 @@ static int tcf_mirred(struct sk_buff *sk + + skb2->skb_iif = skb->dev->ifindex; + skb2->dev = dev; ++ skb_sender_cpu_clear(skb2); + err = dev_queue_xmit(skb2); + + out: diff --git a/queue-4.2/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch b/queue-4.2/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch new file mode 100644 index 00000000000..943f5cd682e --- /dev/null +++ b/queue-4.2/af_unix-convert-the-unix_sk-macro-to-an-inline-function-for-type-safety.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Aaron Conole +Date: Sat, 26 Sep 2015 18:50:42 -0400 +Subject: af_unix: Convert the unix_sk macro to an inline function for type safety + +From: Aaron Conole + +[ Upstream commit 4613012db1d911f80897f9446a49de817b2c4c47 ] + +As suggested by Eric Dumazet this change replaces the +#define with a static inline function to enjoy +complaints by the compiler when misusing the API. + +Signed-off-by: Aaron Conole +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/af_unix.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -63,7 +63,11 @@ struct unix_sock { + #define UNIX_GC_MAYBE_CYCLE 1 + struct socket_wq peer_wq; + }; +-#define unix_sk(__sk) ((struct unix_sock *)__sk) ++ ++static inline struct unix_sock *unix_sk(struct sock *sk) ++{ ++ return (struct unix_sock *)sk; ++} + + #define peer_wait peer_wq.wait + diff --git a/queue-4.2/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch b/queue-4.2/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch new file mode 100644 index 00000000000..c7ed0119148 --- /dev/null +++ b/queue-4.2/af_unix-return-data-from-multiple-skbs-on-recv-with-msg_peek-flag.patch @@ -0,0 +1,60 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Aaron Conole +Date: Sat, 26 Sep 2015 18:50:43 -0400 +Subject: af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag + +From: Aaron Conole + +[ Upstream commit 9f389e35674f5b086edd70ed524ca0f287259725 ] + +AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag +is set. + +This is referenced in kernel bugzilla #12323 @ +https://bugzilla.kernel.org/show_bug.cgi?id=12323 + +As described both in the BZ and lkml thread @ +http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an +AF_UNIX socket only reads a single skb, where the desired effect is +to return as much skb data has been queued, until hitting the recv +buffer size (whichever comes first). + +The modified MSG_PEEK path will now move to the next skb in the tree +and jump to the again: label, rather than following the natural loop +structure. This requires duplicating some of the loop head actions. + +This was tested using the python socketpair python code attached to +the bugzilla issue. + +Signed-off-by: Aaron Conole +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2179,8 +2179,21 @@ unlock: + if (UNIXCB(skb).fp) + scm.fp = scm_fp_dup(UNIXCB(skb).fp); + +- sk_peek_offset_fwd(sk, chunk); ++ if (skip) { ++ sk_peek_offset_fwd(sk, chunk); ++ skip -= chunk; ++ } + ++ if (UNIXCB(skb).fp) ++ break; ++ ++ last = skb; ++ last_len = skb->len; ++ unix_state_lock(sk); ++ skb = skb_peek_next(skb, &sk->sk_receive_queue); ++ if (skb) ++ goto again; ++ unix_state_unlock(sk); + break; + } + } while (size); diff --git a/queue-4.2/bpf-clear-sender_cpu-before-xmit.patch b/queue-4.2/bpf-clear-sender_cpu-before-xmit.patch new file mode 100644 index 00000000000..1981b7c0153 --- /dev/null +++ b/queue-4.2/bpf-clear-sender_cpu-before-xmit.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Alexei Starovoitov +Date: Tue, 6 Oct 2015 20:46:07 -0700 +Subject: bpf: clear sender_cpu before xmit + +From: Alexei Starovoitov + +[ Upstream commit 6bf0577374cfb6c2301dbf4934a4f23ad3d72763 ] + +Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") +the skb->sender_cpu needs to be cleared before xmit. + +Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1412,6 +1412,7 @@ static u64 bpf_clone_redirect(u64 r1, u6 + return dev_forward_skb(dev, skb2); + + skb2->dev = dev; ++ skb_sender_cpu_clear(skb2); + return dev_queue_xmit(skb2); + } + diff --git a/queue-4.2/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch b/queue-4.2/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch new file mode 100644 index 00000000000..0a66e3b173f --- /dev/null +++ b/queue-4.2/bpf-fix-panic-in-so_get_filter-with-native-ebpf-programs.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Daniel Borkmann +Date: Fri, 2 Oct 2015 12:06:03 +0200 +Subject: bpf: fix panic in SO_GET_FILTER with native ebpf programs + +From: Daniel Borkmann + +[ Upstream commit 93d08b6966cf730ea669d4d98f43627597077153 ] + +When sockets have a native eBPF program attached through +setsockopt(sk, SOL_SOCKET, SO_ATTACH_BPF, ...), and then try to +dump these over getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, ...), +the following panic appears: + + [49904.178642] BUG: unable to handle kernel NULL pointer dereference at (null) + [49904.178762] IP: [] sk_get_filter+0x39/0x90 + [49904.182000] PGD 86fc9067 PUD 531a1067 PMD 0 + [49904.185196] Oops: 0000 [#1] SMP + [...] + [49904.224677] Call Trace: + [49904.226090] [] sock_getsockopt+0x319/0x740 + [49904.227535] [] ? sock_has_perm+0x63/0x70 + [49904.228953] [] ? release_sock+0x108/0x150 + [49904.230380] [] ? selinux_socket_getsockopt+0x23/0x30 + [49904.231788] [] SyS_getsockopt+0xa6/0xc0 + [49904.233267] [] entry_SYSCALL_64_fastpath+0x12/0x71 + +The underlying issue is the very same as in commit b382c0865600 +("sock, diag: fix panic in sock_diag_put_filterinfo"), that is, +native eBPF programs don't store an original program since this +is only needed in cBPF ones. + +However, sk_get_filter() wasn't updated to test for this at the +time when eBPF could be attached. Just throw an error to the user +to indicate that eBPF cannot be dumped over this interface. +That way, it can also be known that a program _is_ attached (as +opposed to just return 0), and a different (future) method needs +to be consulted for a dump. + +Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1701,9 +1701,13 @@ int sk_get_filter(struct sock *sk, struc + goto out; + + /* We're copying the filter that has been originally attached, +- * so no conversion/decode needed anymore. ++ * so no conversion/decode needed anymore. eBPF programs that ++ * have no original program cannot be dumped through this. + */ ++ ret = -EACCES; + fprog = filter->prog->orig_prog; ++ if (!fprog) ++ goto out; + + ret = fprog->len; + if (!len) diff --git a/queue-4.2/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch b/queue-4.2/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch new file mode 100644 index 00000000000..766c3c9a9f6 --- /dev/null +++ b/queue-4.2/ethtool-use-kcalloc-instead-of-kmalloc-for-ethtool_get_strings.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Joe Perches +Date: Wed, 14 Oct 2015 01:09:40 -0700 +Subject: ethtool: Use kcalloc instead of kmalloc for ethtool_get_strings + +From: Joe Perches + +[ Upstream commit 077cb37fcf6f00a45f375161200b5ee0cd4e937b ] + +It seems that kernel memory can leak into userspace by a +kmalloc, ethtool_get_strings, then copy_to_user sequence. + +Avoid this by using kcalloc to zero fill the copied buffer. + +Signed-off-by: Joe Perches +Acked-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -1284,7 +1284,7 @@ static int ethtool_get_strings(struct ne + + gstrings.len = ret; + +- data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); ++ data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); + if (!data) + return -ENOMEM; + diff --git a/queue-4.2/inet-fix-race-in-reqsk_queue_unlink.patch b/queue-4.2/inet-fix-race-in-reqsk_queue_unlink.patch new file mode 100644 index 00000000000..7c98de1b298 --- /dev/null +++ b/queue-4.2/inet-fix-race-in-reqsk_queue_unlink.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Eric Dumazet +Date: Thu, 1 Oct 2015 05:39:26 -0700 +Subject: inet: fix race in reqsk_queue_unlink() + +From: Eric Dumazet + +[ Upstream commit 2306c704ce280c97a60d1f45333b822b40281dea ] + +reqsk_timer_handler() tests if icsk_accept_queue.listen_opt +is NULL at its beginning. + +By the time it calls inet_csk_reqsk_queue_drop() and +reqsk_queue_unlink(), listener might have been closed and +inet_csk_listen_stop() had called reqsk_queue_yank_acceptq() +which sets icsk_accept_queue.listen_opt to NULL + +We therefore need to correctly check listen_opt being NULL +after holding syn_wait_lock for proper synchronization. + +Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") +Fixes: b357a364c57c ("inet: fix possible panic in reqsk_queue_unlink()") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -577,21 +577,22 @@ EXPORT_SYMBOL(inet_rtx_syn_ack); + static bool reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req) + { +- struct listen_sock *lopt = queue->listen_opt; + struct request_sock **prev; ++ struct listen_sock *lopt; + bool found = false; + + spin_lock(&queue->syn_wait_lock); +- +- for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; +- prev = &(*prev)->dl_next) { +- if (*prev == req) { +- *prev = req->dl_next; +- found = true; +- break; ++ lopt = queue->listen_opt; ++ if (lopt) { ++ for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; ++ prev = &(*prev)->dl_next) { ++ if (*prev == req) { ++ *prev = req->dl_next; ++ found = true; ++ break; ++ } + } + } +- + spin_unlock(&queue->syn_wait_lock); + if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) + reqsk_put(req); diff --git a/queue-4.2/inet-fix-races-in-reqsk_queue_hash_req.patch b/queue-4.2/inet-fix-races-in-reqsk_queue_hash_req.patch new file mode 100644 index 00000000000..840bdaf11e4 --- /dev/null +++ b/queue-4.2/inet-fix-races-in-reqsk_queue_hash_req.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Eric Dumazet +Date: Sat, 19 Sep 2015 09:48:04 -0700 +Subject: inet: fix races in reqsk_queue_hash_req() + +From: Eric Dumazet + +[ Upstream commit 29c6852602e259d2c1882f320b29d5c3fec0de04 ] + +Before allowing lockless LISTEN processing, we need to make +sure to arm the SYN_RECV timer before the req socket is visible +in hash tables. + +Also, req->rsk_hash should be written before we set rsk_refcnt +to a non zero value. + +Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") +Signed-off-by: Eric Dumazet +Cc: Ying Cai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -685,20 +685,20 @@ void reqsk_queue_hash_req(struct request + req->num_timeout = 0; + req->sk = NULL; + ++ setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); ++ mod_timer_pinned(&req->rsk_timer, jiffies + timeout); ++ req->rsk_hash = hash; ++ + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 2); +- setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); +- req->rsk_hash = hash; + + spin_lock(&queue->syn_wait_lock); + req->dl_next = lopt->syn_table[hash]; + lopt->syn_table[hash] = req; + spin_unlock(&queue->syn_wait_lock); +- +- mod_timer_pinned(&req->rsk_timer, jiffies + timeout); + } + EXPORT_SYMBOL(reqsk_queue_hash_req); + diff --git a/queue-4.2/ipv6-don-t-call-with-rt6_uncached_list_flush_dev.patch b/queue-4.2/ipv6-don-t-call-with-rt6_uncached_list_flush_dev.patch new file mode 100644 index 00000000000..675967a59a5 --- /dev/null +++ b/queue-4.2/ipv6-don-t-call-with-rt6_uncached_list_flush_dev.patch @@ -0,0 +1,72 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: "Eric W. Biederman" +Date: Mon, 12 Oct 2015 11:02:08 -0500 +Subject: ipv6: Don't call with rt6_uncached_list_flush_dev + +From: "Eric W. Biederman" + +[ Upstream commit e332bc67cf5e5e5b71a1aec9750d0791aac65183 ] + +As originally written rt6_uncached_list_flush_dev makes no sense when +called with dev == NULL as it attempts to flush all uncached routes +regardless of network namespace when dev == NULL. Which is simply +incorrect behavior. + +Furthermore at the point rt6_ifdown is called with dev == NULL no more +network devices exist in the network namespace so even if the code in +rt6_uncached_list_flush_dev were to attempt something sensible it +would be meaningless. + +Therefore remove support in rt6_uncached_list_flush_dev for handling +network devices where dev == NULL, and only call rt6_uncached_list_flush_dev + when rt6_ifdown is called with a network device. + +Fixes: 8d0b94afdca8 ("ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister") +Signed-off-by: "Eric W. Biederman" +Reviewed-by: Martin KaFai Lau +Tested-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -139,6 +139,9 @@ static void rt6_uncached_list_flush_dev( + struct net_device *loopback_dev = net->loopback_dev; + int cpu; + ++ if (dev == loopback_dev) ++ return; ++ + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + struct rt6_info *rt; +@@ -148,14 +151,12 @@ static void rt6_uncached_list_flush_dev( + struct inet6_dev *rt_idev = rt->rt6i_idev; + struct net_device *rt_dev = rt->dst.dev; + +- if (rt_idev && (rt_idev->dev == dev || !dev) && +- rt_idev->dev != loopback_dev) { ++ if (rt_idev->dev == dev) { + rt->rt6i_idev = in6_dev_get(loopback_dev); + in6_dev_put(rt_idev); + } + +- if (rt_dev && (rt_dev == dev || !dev) && +- rt_dev != loopback_dev) { ++ if (rt_dev == dev) { + rt->dst.dev = loopback_dev; + dev_hold(rt->dst.dev); + dev_put(rt_dev); +@@ -2577,7 +2578,8 @@ void rt6_ifdown(struct net *net, struct + + fib6_clean_all(net, fib6_ifdown, &adn); + icmp6_clean_all(fib6_ifdown, &adn); +- rt6_uncached_list_flush_dev(net, dev); ++ if (dev) ++ rt6_uncached_list_flush_dev(net, dev); + } + + struct rt6_mtu_change_arg { diff --git a/queue-4.2/l2tp-protect-tunnel-del_work-by-ref_count.patch b/queue-4.2/l2tp-protect-tunnel-del_work-by-ref_count.patch new file mode 100644 index 00000000000..78d75c2a283 --- /dev/null +++ b/queue-4.2/l2tp-protect-tunnel-del_work-by-ref_count.patch @@ -0,0 +1,55 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Alexander Couzens +Date: Mon, 28 Sep 2015 11:32:42 +0200 +Subject: l2tp: protect tunnel->del_work by ref_count + +From: Alexander Couzens + +[ Upstream commit 06a15f51cf3618e32a73871ee6a547ef7fd902b5 ] + +There is a small chance that tunnel_free() is called before tunnel->del_work scheduled +resulting in a zero pointer dereference. + +Signed-off-by: Alexander Couzens +Acked-by: James Chapman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct + tunnel = container_of(work, struct l2tp_tunnel, del_work); + sk = l2tp_tunnel_sock_lookup(tunnel); + if (!sk) +- return; ++ goto out; + + sock = sk->sk_socket; + +@@ -1341,6 +1341,8 @@ static void l2tp_tunnel_del_work(struct + } + + l2tp_tunnel_sock_put(sk); ++out: ++ l2tp_tunnel_dec_refcount(tunnel); + } + + /* Create a socket for the tunnel, if one isn't set up by +@@ -1636,8 +1638,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); + */ + int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) + { ++ l2tp_tunnel_inc_refcount(tunnel); + l2tp_tunnel_closeall(tunnel); +- return (false == queue_work(l2tp_wq, &tunnel->del_work)); ++ if (false == queue_work(l2tp_wq, &tunnel->del_work)) { ++ l2tp_tunnel_dec_refcount(tunnel); ++ return 1; ++ } ++ return 0; + } + EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); + diff --git a/queue-4.2/net-add-pfmemalloc-check-in-sk_add_backlog.patch b/queue-4.2/net-add-pfmemalloc-check-in-sk_add_backlog.patch new file mode 100644 index 00000000000..2dfdab758a0 --- /dev/null +++ b/queue-4.2/net-add-pfmemalloc-check-in-sk_add_backlog.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Eric Dumazet +Date: Tue, 29 Sep 2015 18:52:25 -0700 +Subject: net: add pfmemalloc check in sk_add_backlog() + +From: Eric Dumazet + +[ Upstream commit c7c49b8fde26b74277188bdc6c9dca38db6fa35b ] + +Greg reported crashes hitting the following check in __sk_backlog_rcv() + + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + +The pfmemalloc bit is currently checked in sk_filter(). + +This works correctly for TCP, because sk_filter() is ran in +tcp_v[46]_rcv() before hitting the prequeue or backlog checks. + +For UDP or other protocols, this does not work, because the sk_filter() +is ran from sock_queue_rcv_skb(), which might be called _after_ backlog +queuing if socket is owned by user by the time packet is processed by +softirq handler. + +Fixes: b4b9e35585089 ("netvm: set PF_MEMALLOC as appropriate during SKB processing") +Signed-off-by: Eric Dumazet +Reported-by: Greg Thelen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -826,6 +826,14 @@ static inline __must_check int sk_add_ba + if (sk_rcvqueues_full(sk, limit)) + return -ENOBUFS; + ++ /* ++ * If the skb was allocated from pfmemalloc reserves, only ++ * allow SOCK_MEMALLOC sockets to use it as this socket is ++ * helping free memory ++ */ ++ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) ++ return -ENOMEM; ++ + __sk_add_backlog(sk, skb); + sk->sk_backlog.len += skb->truesize; + return 0; diff --git a/queue-4.2/net-dsa-fix-preparation-of-a-port-stp-update.patch b/queue-4.2/net-dsa-fix-preparation-of-a-port-stp-update.patch new file mode 100644 index 00000000000..5e0635c357d --- /dev/null +++ b/queue-4.2/net-dsa-fix-preparation-of-a-port-stp-update.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Vivien Didelot +Date: Tue, 29 Sep 2015 14:17:54 -0400 +Subject: net: dsa: fix preparation of a port STP update + +From: Vivien Didelot + +[ Upstream commit 57a47532c4312159935c98b7f1cf0e62296b9171 ] + +Because of the default 0 value of ret in dsa_slave_port_attr_set, a +driver may return -EOPNOTSUPP from the commit phase of a STP state, +which triggers a WARN() from switchdev. + +This happened on a 6185 switch which does not support hardware bridging. + +Fixes: 3563606258cf ("switchdev: convert STP update to switchdev attr set") +Reported-by: Andrew Lunn +Signed-off-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/slave.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -348,12 +348,17 @@ static int dsa_slave_stp_update(struct n + static int dsa_slave_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) + { +- int ret = 0; ++ struct dsa_slave_priv *p = netdev_priv(dev); ++ struct dsa_switch *ds = p->parent; ++ int ret; + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: +- if (attr->trans == SWITCHDEV_TRANS_COMMIT) +- ret = dsa_slave_stp_update(dev, attr->u.stp_state); ++ if (attr->trans == SWITCHDEV_TRANS_PREPARE) ++ ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; ++ else ++ ret = ds->drv->port_stp_update(ds, p->port, ++ attr->u.stp_state); + break; + default: + ret = -EOPNOTSUPP; diff --git a/queue-4.2/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch b/queue-4.2/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch new file mode 100644 index 00000000000..17d64561d9a --- /dev/null +++ b/queue-4.2/net-ibm-emac-bump-version-numbers-for-correct-work-with-ethtool.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Ivan Mikhaylov +Date: Fri, 25 Sep 2015 11:52:27 +0400 +Subject: net/ibm/emac: bump version numbers for correct work with ethtool + +From: Ivan Mikhaylov + +[ Upstream commit 661dfc65f7981481ba2e31aaa702371e82336e56 ] + +The size of the MAC register dump used to be the size specified by the +reg property in the device tree. Userland has no good way of finding +out that size, and it was not specified consistently for each MAC type, +so ethtool would end up printing junk at the end of the register dump +if the device tree didn't match the size it assumed. + +Using the new version numbers indicates unambiguously that the size of +the MAC register dump is dependent only on the MAC type. + +Fixes: 5369c71f7ca2 ("net/ibm/emac: fix size of emac dump memory areas") +Signed-off-by: Ivan Mikhaylov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/emac/core.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/ibm/emac/core.h ++++ b/drivers/net/ethernet/ibm/emac/core.h +@@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { + u32 index; + }; + +-#define EMAC_ETHTOOL_REGS_VER 0 +-#define EMAC4_ETHTOOL_REGS_VER 1 +-#define EMAC4SYNC_ETHTOOL_REGS_VER 2 ++#define EMAC_ETHTOOL_REGS_VER 3 ++#define EMAC4_ETHTOOL_REGS_VER 4 ++#define EMAC4SYNC_ETHTOOL_REGS_VER 5 + + #endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/queue-4.2/net-unix-fix-logic-about-sk_peek_offset.patch b/queue-4.2/net-unix-fix-logic-about-sk_peek_offset.patch new file mode 100644 index 00000000000..caf341d6a5f --- /dev/null +++ b/queue-4.2/net-unix-fix-logic-about-sk_peek_offset.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Andrey Vagin +Date: Fri, 2 Oct 2015 00:05:36 +0300 +Subject: net/unix: fix logic about sk_peek_offset + +From: Andrey Vagin + +[ Upstream commit e9193d60d363e4dff75ff6d43a48f22be26d59c7 ] + +Now send with MSG_PEEK can return data from multiple SKBs. + +Unfortunately we take into account the peek offset for each skb, +that is wrong. We need to apply the peek offset only once. + +In addition, the peek offset should be used only if MSG_PEEK is set. + +Cc: "David S. Miller" (maintainer:NETWORKING +Cc: Eric Dumazet (commit_signer:1/14=7%) +Cc: Aaron Conole +Fixes: 9f389e35674f ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag") +Signed-off-by: Andrey Vagin +Tested-by: Aaron Conole +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2064,6 +2064,11 @@ static int unix_stream_read_generic(stru + goto out; + } + ++ if (flags & MSG_PEEK) ++ skip = sk_peek_offset(sk, flags); ++ else ++ skip = 0; ++ + do { + int chunk; + struct sk_buff *skb, *last; +@@ -2112,7 +2117,6 @@ unlock: + break; + } + +- skip = sk_peek_offset(sk, flags); + while (skip >= unix_skb_len(skb)) { + skip -= unix_skb_len(skb); + last = skb; +@@ -2179,14 +2183,12 @@ unlock: + if (UNIXCB(skb).fp) + scm.fp = scm_fp_dup(UNIXCB(skb).fp); + +- if (skip) { +- sk_peek_offset_fwd(sk, chunk); +- skip -= chunk; +- } ++ sk_peek_offset_fwd(sk, chunk); + + if (UNIXCB(skb).fp) + break; + ++ skip = 0; + last = skb; + last_len = skb->len; + unix_state_lock(sk); diff --git a/queue-4.2/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch b/queue-4.2/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch new file mode 100644 index 00000000000..dc17779e47c --- /dev/null +++ b/queue-4.2/netlink-trim-skb-to-alloc-size-to-avoid-msg_trunc.patch @@ -0,0 +1,107 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: "Arad, Ronen" +Date: Thu, 15 Oct 2015 01:55:17 -0700 +Subject: netlink: Trim skb to alloc size to avoid MSG_TRUNC + +From: "Arad, Ronen" + +[ Upstream commit db65a3aaf29ecce2e34271d52e8d2336b97bd9fe ] + +netlink_dump() allocates skb based on the calculated min_dump_alloc or +a per socket max_recvmsg_len. +min_alloc_size is maximum space required for any single netdev +attributes as calculated by rtnl_calcit(). +max_recvmsg_len tracks the user provided buffer to netlink_recvmsg. +It is capped at 16KiB. +The intention is to avoid small allocations and to minimize the number +of calls required to obtain dump information for all net devices. + +netlink_dump packs as many small messages as could fit within an skb +that was sized for the largest single netdev information. The actual +space available within an skb is larger than what is requested. It could +be much larger and up to near 2x with align to next power of 2 approach. + +Allowing netlink_dump to use all the space available within the +allocated skb increases the buffer size a user has to provide to avoid +truncaion (i.e. MSG_TRUNG flag set). + +It was observed that with many VLANs configured on at least one netdev, +a larger buffer of near 64KiB was necessary to avoid "Message truncated" +error in "ip link" or "bridge [-c[ompressvlans]] vlan show" when +min_alloc_size was only little over 32KiB. + +This patch trims skb to allocated size in order to allow the user to +avoid truncation with more reasonable buffer size. + +Signed-off-by: Ronen Arad +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 34 ++++++++++++++++++++++------------ + 1 file changed, 22 insertions(+), 12 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2750,6 +2750,7 @@ static int netlink_dump(struct sock *sk) + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + int len, err = -ENOBUFS; ++ int alloc_min_size; + int alloc_size; + + mutex_lock(nlk->cb_mutex); +@@ -2758,9 +2759,6 @@ static int netlink_dump(struct sock *sk) + goto errout_skb; + } + +- cb = &nlk->cb; +- alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); +- + if (!netlink_rx_is_mmaped(sk) && + atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + goto errout_skb; +@@ -2770,23 +2768,35 @@ static int netlink_dump(struct sock *sk) + * to reduce number of system calls on dump operations, if user + * ever provided a big enough buffer. + */ +- if (alloc_size < nlk->max_recvmsg_len) { +- skb = netlink_alloc_skb(sk, +- nlk->max_recvmsg_len, +- nlk->portid, ++ cb = &nlk->cb; ++ alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); ++ ++ if (alloc_min_size < nlk->max_recvmsg_len) { ++ alloc_size = nlk->max_recvmsg_len; ++ skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); +- /* available room should be exact amount to avoid MSG_TRUNC */ +- if (skb) +- skb_reserve(skb, skb_tailroom(skb) - +- nlk->max_recvmsg_len); + } +- if (!skb) ++ if (!skb) { ++ alloc_size = alloc_min_size; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, + GFP_KERNEL); ++ } + if (!skb) + goto errout_skb; ++ ++ /* Trim skb to allocated size. User is expected to provide buffer as ++ * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at ++ * netlink_recvmsg())). dump will pack as many smaller messages as ++ * could fit within the allocated skb. skb is typically allocated ++ * with larger space than required (could be as much as near 2x the ++ * requested size with align to next power of 2 approach). Allowing ++ * dump to use the excess space makes it difficult for a user to have a ++ * reasonable static buffer based on the expected largest dump of a ++ * single netdev. The outcome is MSG_TRUNC error. ++ */ ++ skb_reserve(skb, skb_tailroom(skb) - alloc_size); + netlink_skb_set_owner_r(skb, sk); + + len = cb->dump(skb, cb); diff --git a/queue-4.2/ovs-do-not-allocate-memory-from-offline-numa-node.patch b/queue-4.2/ovs-do-not-allocate-memory-from-offline-numa-node.patch new file mode 100644 index 00000000000..d63543432cc --- /dev/null +++ b/queue-4.2/ovs-do-not-allocate-memory-from-offline-numa-node.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Konstantin Khlebnikov +Date: Fri, 2 Oct 2015 13:18:22 +0300 +Subject: ovs: do not allocate memory from offline numa node + +From: Konstantin Khlebnikov + +[ Upstream commit 598c12d0ba6de9060f04999746eb1e015774044b ] + +When openvswitch tries allocate memory from offline numa node 0: +stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, 0) +It catches VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)) +[ replaced with VM_WARN_ON(!node_online(nid)) recently ] in linux/gfp.h +This patch disables numa affinity in this case. + +Signed-off-by: Konstantin Khlebnikov +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_table.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/openvswitch/flow_table.c ++++ b/net/openvswitch/flow_table.c +@@ -92,7 +92,8 @@ struct sw_flow *ovs_flow_alloc(void) + + /* Initialize the default stat node. */ + stats = kmem_cache_alloc_node(flow_stats_cache, +- GFP_KERNEL | __GFP_ZERO, 0); ++ GFP_KERNEL | __GFP_ZERO, ++ node_online(0) ? 0 : NUMA_NO_NODE); + if (!stats) + goto err; + diff --git a/queue-4.2/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch b/queue-4.2/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch new file mode 100644 index 00000000000..0234e75fdc8 --- /dev/null +++ b/queue-4.2/ppp-don-t-override-sk-sk_state-in-pppoe_flush_dev.patch @@ -0,0 +1,76 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Guillaume Nault +Date: Wed, 30 Sep 2015 11:45:33 +0200 +Subject: ppp: don't override sk->sk_state in pppoe_flush_dev() + +From: Guillaume Nault + +[ Upstream commit e6740165b8f7f06d8caee0fceab3fb9d790a6fed ] + +Since commit 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release"), +pppoe_release() calls dev_put(po->pppoe_dev) if sk is in the +PPPOX_ZOMBIE state. But pppoe_flush_dev() can set sk->sk_state to +PPPOX_ZOMBIE _and_ reset po->pppoe_dev to NULL. This leads to the +following oops: + +[ 570.140800] BUG: unable to handle kernel NULL pointer dereference at 00000000000004e0 +[ 570.142931] IP: [] pppoe_release+0x50/0x101 [pppoe] +[ 570.144601] PGD 3d119067 PUD 3dbc1067 PMD 0 +[ 570.144601] Oops: 0000 [#1] SMP +[ 570.144601] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pppoe pppox ppp_generic slhc loop crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper acpi_cpufreq evdev serio_raw processor button ext4 crc16 mbcache jbd2 virtio_net virtio_blk virtio_pci virtio_ring virtio +[ 570.144601] CPU: 1 PID: 15738 Comm: ppp-apitest Not tainted 4.2.0 #1 +[ 570.144601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 +[ 570.144601] task: ffff88003d30d600 ti: ffff880036b60000 task.ti: ffff880036b60000 +[ 570.144601] RIP: 0010:[] [] pppoe_release+0x50/0x101 [pppoe] +[ 570.144601] RSP: 0018:ffff880036b63e08 EFLAGS: 00010202 +[ 570.144601] RAX: 0000000000000000 RBX: ffff880034340000 RCX: 0000000000000206 +[ 570.144601] RDX: 0000000000000006 RSI: ffff88003d30dd20 RDI: ffff88003d30dd20 +[ 570.144601] RBP: ffff880036b63e28 R08: 0000000000000001 R09: 0000000000000000 +[ 570.144601] R10: 00007ffee9b50420 R11: ffff880034340078 R12: ffff8800387ec780 +[ 570.144601] R13: ffff8800387ec7b0 R14: ffff88003e222aa0 R15: ffff8800387ec7b0 +[ 570.144601] FS: 00007f5672f48700(0000) GS:ffff88003fc80000(0000) knlGS:0000000000000000 +[ 570.144601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 570.144601] CR2: 00000000000004e0 CR3: 0000000037f7e000 CR4: 00000000000406a0 +[ 570.144601] Stack: +[ 570.144601] ffffffffa018f240 ffff8800387ec780 ffffffffa018f240 ffff8800387ec7b0 +[ 570.144601] ffff880036b63e48 ffffffff812caabe ffff880039e4e000 0000000000000008 +[ 570.144601] ffff880036b63e58 ffffffff812cabad ffff880036b63ea8 ffffffff811347f5 +[ 570.144601] Call Trace: +[ 570.144601] [] sock_release+0x1a/0x75 +[ 570.144601] [] sock_close+0xd/0x11 +[ 570.144601] [] __fput+0xff/0x1a5 +[ 570.144601] [] ____fput+0x9/0xb +[ 570.144601] [] task_work_run+0x66/0x90 +[ 570.144601] [] prepare_exit_to_usermode+0x8c/0xa7 +[ 570.144601] [] syscall_return_slowpath+0x16d/0x19b +[ 570.144601] [] int_ret_from_sys_call+0x25/0x9f +[ 570.144601] Code: 48 8b 83 c8 01 00 00 a8 01 74 12 48 89 df e8 8b 27 14 e1 b8 f7 ff ff ff e9 b7 00 00 00 8a 43 12 a8 0b 74 1c 48 8b 83 a8 04 00 00 <48> 8b 80 e0 04 00 00 65 ff 08 48 c7 83 a8 04 00 00 00 00 00 00 +[ 570.144601] RIP [] pppoe_release+0x50/0x101 [pppoe] +[ 570.144601] RSP +[ 570.144601] CR2: 00000000000004e0 +[ 570.200518] ---[ end trace 46956baf17349563 ]--- + +pppoe_flush_dev() has no reason to override sk->sk_state with +PPPOX_ZOMBIE. pppox_unbind_sock() already sets sk->sk_state to +PPPOX_DEAD, which is the correct state given that sk is unbound and +po->pppoe_dev is NULL. + +Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release") +Tested-by: Oleksii Berezhniak +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -313,7 +313,6 @@ static void pppoe_flush_dev(struct net_d + if (po->pppoe_dev == dev && + sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { + pppox_unbind_sock(sk); +- sk->sk_state = PPPOX_ZOMBIE; + sk->sk_state_change(sk); + po->pppoe_dev = NULL; + dev_put(dev); diff --git a/queue-4.2/skbuff-fix-skb-checksum-flag-on-skb-pull.patch b/queue-4.2/skbuff-fix-skb-checksum-flag-on-skb-pull.patch new file mode 100644 index 00000000000..4075874240b --- /dev/null +++ b/queue-4.2/skbuff-fix-skb-checksum-flag-on-skb-pull.patch @@ -0,0 +1,68 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Pravin B Shelar +Date: Tue, 22 Sep 2015 12:57:53 -0700 +Subject: skbuff: Fix skb checksum flag on skb pull + +From: Pravin B Shelar + +[ Upstream commit 6ae459bdaaeebc632b16e54dcbabb490c6931d61 ] + +VXLAN device can receive skb with checksum partial. But the checksum +offset could be in outer header which is pulled on receive. This results +in negative checksum offset for the skb. Such skb can cause the assert +failure in skb_checksum_help(). Following patch fixes the bug by setting +checksum-none while pulling outer header. + +Following is the kernel panic msg from old kernel hitting the bug. + +------------[ cut here ]------------ +kernel BUG at net/core/dev.c:1906! +RIP: 0010:[] skb_checksum_help+0x144/0x150 +Call Trace: + +[] queue_userspace_packet+0x408/0x470 [openvswitch] +[] ovs_dp_upcall+0x5d/0x60 [openvswitch] +[] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch] +[] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch] +[] ovs_vport_receive+0x2a/0x30 [openvswitch] +[] vxlan_rcv+0x53/0x60 [openvswitch] +[] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch] +[] udp_queue_rcv_skb+0x2dc/0x3b0 +[] __udp4_lib_rcv+0x1cf/0x6c0 +[] udp_rcv+0x1a/0x20 +[] ip_local_deliver_finish+0xdd/0x280 +[] ip_local_deliver+0x88/0x90 +[] ip_rcv_finish+0x10d/0x370 +[] ip_rcv+0x235/0x300 +[] __netif_receive_skb+0x55d/0x620 +[] netif_receive_skb+0x80/0x90 +[] virtnet_poll+0x555/0x6f0 +[] net_rx_action+0x134/0x290 +[] __do_softirq+0xa8/0x210 +[] call_softirq+0x1c/0x30 +[] do_softirq+0x65/0xa0 +[] irq_exit+0x8e/0xb0 +[] do_IRQ+0x63/0xe0 +[] common_interrupt+0x6e/0x6e + +Reported-by: Anupam Chanda +Signed-off-by: Pravin B Shelar +Acked-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2601,6 +2601,9 @@ static inline void skb_postpull_rcsum(st + { + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); ++ else if (skb->ip_summed == CHECKSUM_PARTIAL && ++ skb_checksum_start_offset(skb) <= len) ++ skb->ip_summed = CHECKSUM_NONE; + } + + unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); diff --git a/queue-4.2/skbuff-fix-skb-checksum-partial-check.patch b/queue-4.2/skbuff-fix-skb-checksum-partial-check.patch new file mode 100644 index 00000000000..4d3fed25233 --- /dev/null +++ b/queue-4.2/skbuff-fix-skb-checksum-partial-check.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Pravin B Shelar +Date: Mon, 28 Sep 2015 17:24:25 -0700 +Subject: skbuff: Fix skb checksum partial check. + +From: Pravin B Shelar + +[ Upstream commit 31b33dfb0a144469dd805514c9e63f4993729a48 ] + +Earlier patch 6ae459bda tried to detect void ckecksum partial +skb by comparing pull length to checksum offset. But it does +not work for all cases since checksum-offset depends on +updates to skb->data. + +Following patch fixes it by validating checksum start offset +after skb-data pointer is updated. Negative value of checksum +offset start means there is no need to checksum. + +Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") +Reported-by: Andrew Vagin +Signed-off-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 2 +- + net/core/skbuff.c | 9 +++++---- + 2 files changed, 6 insertions(+), 5 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2602,7 +2602,7 @@ static inline void skb_postpull_rcsum(st + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && +- skb_checksum_start_offset(skb) <= len) ++ skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; + } + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); + */ + unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) + { ++ unsigned char *data = skb->data; ++ + BUG_ON(len > skb->len); +- skb->len -= len; +- BUG_ON(skb->len < skb->data_len); +- skb_postpull_rcsum(skb, skb->data, len); +- return skb->data += len; ++ __skb_pull(skb, len); ++ skb_postpull_rcsum(skb, data, len); ++ return skb->data; + } + EXPORT_SYMBOL_GPL(skb_pull_rcsum); + diff --git a/queue-4.2/tipc-move-fragment-importance-field-to-new-header-position.patch b/queue-4.2/tipc-move-fragment-importance-field-to-new-header-position.patch new file mode 100644 index 00000000000..866de2e9a25 --- /dev/null +++ b/queue-4.2/tipc-move-fragment-importance-field-to-new-header-position.patch @@ -0,0 +1,57 @@ +From foo@baz Thu Oct 22 17:25:24 PDT 2015 +From: Jon Paul Maloy +Date: Wed, 14 Oct 2015 09:23:18 -0400 +Subject: tipc: move fragment importance field to new header position + +From: Jon Paul Maloy + +[ Upstream commit dde4b5ae65de659b9ec64bafdde0430459fcb495 ] + +In commit e3eea1eb47a ("tipc: clean up handling of message priorities") +we introduced a field in the packet header for keeping track of the +priority of fragments, since this value is not present in the specified +protocol header. Since the value so far only is used at the transmitting +end of the link, we have not yet officially defined it as part of the +protocol. + +Unfortunately, the field we use for keeping this value, bits 13-15 in +in word 5, has turned out to be a poor choice; it is already used by the +broadcast protocol for carrying the 'network id' field of the sending +node. Since packet fragments also need to be transported across the +broadcast protocol, the risk of conflict is obvious, and we see this +happen when we use network identities larger than 2^13-1. This has +escaped our testing because we have so far only been using small network +id values. + +We now move this field to bits 0-2 in word 9, a field that is guaranteed +to be unused by all involved protocols. + +Fixes: e3eea1eb47a ("tipc: clean up handling of message priorities") +Signed-off-by: Jon Maloy +Acked-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/msg.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/tipc/msg.h ++++ b/net/tipc/msg.h +@@ -357,7 +357,7 @@ static inline u32 msg_importance(struct + if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) + return usr; + if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) +- return msg_bits(m, 5, 13, 0x7); ++ return msg_bits(m, 9, 0, 0x7); + return TIPC_SYSTEM_IMPORTANCE; + } + +@@ -366,7 +366,7 @@ static inline void msg_set_importance(st + int usr = msg_user(m); + + if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) +- msg_set_bits(m, 5, 13, 0x7, i); ++ msg_set_bits(m, 9, 0, 0x7, i); + else if (i < TIPC_SYSTEM_IMPORTANCE) + msg_set_user(m, i); + else -- 2.47.2