From 5372e8ba7b81c5022dd01de42b9b4024bc87db86 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 26 Jan 2016 22:26:45 -0800 Subject: [PATCH] 3.14-stable patches added patches: bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch connector-bump-skb-users-before-callback-invocation.patch ipv6-addrlabel-fix-ip6addrlbl_get.patch ipv6-update-skb-csum-when-ce-mark-is-propagated.patch isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch net-possible-use-after-free-in-dst_release.patch net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch phonet-properly-unshare-skbs-in-phonet_rcv.patch ppp-slip-validate-vj-compression-slot-parameters-completely.patch sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch tcp_yeah-don-t-set-ssthresh-below-2.patch team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch unix-properly-account-for-fds-passed-over-unix-sockets.patch utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch --- ...nk-local-address-on-enslaved-devices.patch | 60 ++++++++ ...tp-for-the-initial-network-namespace.patch | 48 ++++++ ...skb-users-before-callback-invocation.patch | 57 ++++++++ .../ipv6-addrlabel-fix-ip6addrlbl_get.patch | 39 +++++ ...-skb-csum-when-ce-mark-is-propagated.patch | 77 ++++++++++ ...-allocation-failure-in-isdn_ppp_open.patch | 43 ++++++ ...ssible-use-after-free-in-dst_release.patch | 42 ++++++ ...ac_alg-from-accessing-invalid-memory.patch | 37 +++++ ...-properly-unshare-skbs-in-phonet_rcv.patch | 48 ++++++ ...mpression-slot-parameters-completely.patch | 136 +++++++++++++++++ ...abort_user-return-null-in-sctp_close.patch | 72 +++++++++ queue-3.14/series | 16 ++ .../tcp_yeah-don-t-set-ssthresh-below-2.patch | 49 +++++++ ...ith-a-mutex-in-team_vlan_rx_kill_vid.patch | 42 ++++++ ...unt-for-fds-passed-over-unix-sockets.patch | 138 ++++++++++++++++++ ...0bad-20checksums-utf-8-q-20as-20good.patch | 75 ++++++++++ ...t-which-detect-duplicate-vxlan-iface.patch | 78 ++++++++++ 17 files changed, 1057 insertions(+) create mode 100644 queue-3.14/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch create mode 100644 queue-3.14/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch create mode 100644 queue-3.14/connector-bump-skb-users-before-callback-invocation.patch create mode 100644 queue-3.14/ipv6-addrlabel-fix-ip6addrlbl_get.patch create mode 100644 queue-3.14/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch create mode 100644 queue-3.14/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch create mode 100644 queue-3.14/net-possible-use-after-free-in-dst_release.patch create mode 100644 queue-3.14/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch create mode 100644 queue-3.14/phonet-properly-unshare-skbs-in-phonet_rcv.patch create mode 100644 queue-3.14/ppp-slip-validate-vj-compression-slot-parameters-completely.patch create mode 100644 queue-3.14/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch create mode 100644 queue-3.14/tcp_yeah-don-t-set-ssthresh-below-2.patch create mode 100644 queue-3.14/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch create mode 100644 queue-3.14/unix-properly-account-for-fds-passed-over-unix-sockets.patch create mode 100644 queue-3.14/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch create mode 100644 queue-3.14/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch diff --git a/queue-3.14/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch b/queue-3.14/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch new file mode 100644 index 00000000000..9cee1c00811 --- /dev/null +++ b/queue-3.14/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch @@ -0,0 +1,60 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Karl Heiss +Date: Mon, 11 Jan 2016 08:28:43 -0500 +Subject: bonding: Prevent IPv6 link local address on enslaved devices +Content-Length: 2098 +Lines: 55 + +From: Karl Heiss + +[ Upstream commit 03d84a5f83a67e692af00a3d3901e7820e3e84d5 ] + +Commit 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") +undoes the fix provided by commit c2edacf80e15 ("bonding / ipv6: no addrconf +for slaves separately from master") by effectively setting the slave flag +after the slave has been opened. If the slave comes up quickly enough, it +will go through the IPv6 addrconf before the slave flag has been set and +will get a link local IPv6 address. + +In order to ensure that addrconf knows to ignore the slave devices on state +change, set IFF_SLAVE before dev_open() during bonding enslavement. + +Fixes: 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") +Signed-off-by: Karl Heiss +Signed-off-by: Jay Vosburgh +Reviewed-by: Jarod Wilson +Signed-off-by: Andy Gospodarek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1181,7 +1181,6 @@ static int bond_master_upper_dev_link(st + err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + if (err) + return err; +- slave_dev->flags |= IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + return 0; + } +@@ -1363,6 +1362,9 @@ int bond_enslave(struct net_device *bond + } + } + ++ /* set slave flag before open to prevent IPv6 addrconf */ ++ slave_dev->flags |= IFF_SLAVE; ++ + /* open the slave since the application closed it */ + res = dev_open(slave_dev); + if (res) { +@@ -1617,6 +1619,7 @@ err_close: + dev_close(slave_dev); + + err_restore_mac: ++ slave_dev->flags &= ~IFF_SLAVE; + if (!bond->params.fail_over_mac || + bond->params.mode != BOND_MODE_ACTIVEBACKUP) { + /* XXX TODO - fom follow mode needs to change master's diff --git a/queue-3.14/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch b/queue-3.14/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch new file mode 100644 index 00000000000..0d2a2b26100 --- /dev/null +++ b/queue-3.14/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Hannes Frederic Sowa +Date: Tue, 5 Jan 2016 10:46:00 +0100 +Subject: bridge: Only call /sbin/bridge-stp for the initial network namespace +Status: RO +Content-Length: 1472 +Lines: 42 + +From: Hannes Frederic Sowa + +[ Upstream commit ff62198553e43cdffa9d539f6165d3e83f8a42bc ] + +[I stole this patch from Eric Biederman. He wrote:] + +> There is no defined mechanism to pass network namespace information +> into /sbin/bridge-stp therefore don't even try to invoke it except +> for bridge devices in the initial network namespace. +> +> It is possible for unprivileged users to cause /sbin/bridge-stp to be +> invoked for any network device name which if /sbin/bridge-stp does not +> guard against unreasonable arguments or being invoked twice on the +> same network device could cause problems. + +[Hannes: changed patch using netns_eq] + +Cc: Eric W. Biederman +Signed-off-by: Eric W. Biederman +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_stp_if.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -128,7 +128,10 @@ static void br_stp_start(struct net_brid + char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; + char *envp[] = { NULL }; + +- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); ++ if (net_eq(dev_net(br->dev), &init_net)) ++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); ++ else ++ r = -ENOENT; + + spin_lock_bh(&br->lock); + diff --git a/queue-3.14/connector-bump-skb-users-before-callback-invocation.patch b/queue-3.14/connector-bump-skb-users-before-callback-invocation.patch new file mode 100644 index 00000000000..0f2301163e1 --- /dev/null +++ b/queue-3.14/connector-bump-skb-users-before-callback-invocation.patch @@ -0,0 +1,57 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Florian Westphal +Date: Thu, 31 Dec 2015 14:26:33 +0100 +Subject: connector: bump skb->users before callback invocation +Status: RO +Content-Length: 1407 +Lines: 51 + +From: Florian Westphal + +[ Upstream commit 55285bf09427c5abf43ee1d54e892f352092b1f1 ] + +Dmitry reports memleak with syskaller program. +Problem is that connector bumps skb usecount but might not invoke callback. + +So move skb_get to where we invoke the callback. + +Reported-by: Dmitry Vyukov +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/connector/connector.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +--- a/drivers/connector/connector.c ++++ b/drivers/connector/connector.c +@@ -154,26 +154,21 @@ static int cn_call_callback(struct sk_bu + * + * It checks skb, netlink header and msg sizes, and calls callback helper. + */ +-static void cn_rx_skb(struct sk_buff *__skb) ++static void cn_rx_skb(struct sk_buff *skb) + { + struct nlmsghdr *nlh; +- struct sk_buff *skb; + int len, err; + +- skb = skb_get(__skb); +- + if (skb->len >= NLMSG_HDRLEN) { + nlh = nlmsg_hdr(skb); + len = nlmsg_len(nlh); + + if (len < (int)sizeof(struct cn_msg) || + skb->len < nlh->nlmsg_len || +- len > CONNECTOR_MAX_MSG_SIZE) { +- kfree_skb(skb); ++ len > CONNECTOR_MAX_MSG_SIZE) + return; +- } + +- err = cn_call_callback(skb); ++ err = cn_call_callback(skb_get(skb)); + if (err < 0) + kfree_skb(skb); + } diff --git a/queue-3.14/ipv6-addrlabel-fix-ip6addrlbl_get.patch b/queue-3.14/ipv6-addrlabel-fix-ip6addrlbl_get.patch new file mode 100644 index 00000000000..034d6d1448d --- /dev/null +++ b/queue-3.14/ipv6-addrlabel-fix-ip6addrlbl_get.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Andrey Ryabinin +Date: Mon, 21 Dec 2015 12:54:45 +0300 +Subject: ipv6/addrlabel: fix ip6addrlbl_get() +Status: RO +Content-Length: 1182 +Lines: 33 + +From: Andrey Ryabinin + +[ Upstream commit e459dfeeb64008b2d23bdf600f03b3605dbb8152 ] + +ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded, +ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed, +ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer. + +Fix this by inverting ip6addrlbl_hold() check. + +Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") +Signed-off-by: Andrey Ryabinin +Reviewed-by: Cong Wang +Acked-by: YOSHIFUJI Hideaki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrlabel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/addrlabel.c ++++ b/net/ipv6/addrlabel.c +@@ -558,7 +558,7 @@ static int ip6addrlbl_get(struct sk_buff + + rcu_read_lock(); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); +- if (p && ip6addrlbl_hold(p)) ++ if (p && !ip6addrlbl_hold(p)) + p = NULL; + lseq = ip6addrlbl_table.seq; + rcu_read_unlock(); diff --git a/queue-3.14/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch b/queue-3.14/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch new file mode 100644 index 00000000000..9ce66b95150 --- /dev/null +++ b/queue-3.14/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Eric Dumazet +Date: Fri, 15 Jan 2016 04:56:56 -0800 +Subject: ipv6: update skb->csum when CE mark is propagated +Status: RO +Content-Length: 2420 +Lines: 73 + +From: Eric Dumazet + +[ Upstream commit 34ae6a1aa0540f0f781dd265366036355fdc8930 ] + +When a tunnel decapsulates the outer header, it has to comply +with RFC 6080 and eventually propagate CE mark into inner header. + +It turns out IP6_ECN_set_ce() does not correctly update skb->csum +for CHECKSUM_COMPLETE packets, triggering infamous "hw csum failure" +messages and stack traces. + +Signed-off-by: Eric Dumazet +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_ecn.h | 19 ++++++++++++++++--- + net/ipv6/xfrm6_mode_tunnel.c | 2 +- + 2 files changed, 17 insertions(+), 4 deletions(-) + +--- a/include/net/inet_ecn.h ++++ b/include/net/inet_ecn.h +@@ -111,11 +111,24 @@ static inline void ipv4_copy_dscp(unsign + + struct ipv6hdr; + +-static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) ++/* Note: ++ * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, ++ * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE ++ * In IPv6 case, no checksum compensates the change in IPv6 header, ++ * so we have to update skb->csum. ++ */ ++static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) + { ++ __be32 from, to; ++ + if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) + return 0; +- *(__be32*)iph |= htonl(INET_ECN_CE << 20); ++ ++ from = *(__be32 *)iph; ++ to = from | htonl(INET_ECN_CE << 20); ++ *(__be32 *)iph = to; ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->csum = csum_add(csum_sub(skb->csum, from), to); + return 1; + } + +@@ -142,7 +155,7 @@ static inline int INET_ECN_set_ce(struct + case cpu_to_be16(ETH_P_IPV6): + if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= + skb_tail_pointer(skb)) +- return IP6_ECN_set_ce(ipv6_hdr(skb)); ++ return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + break; + } + +--- a/net/ipv6/xfrm6_mode_tunnel.c ++++ b/net/ipv6/xfrm6_mode_tunnel.c +@@ -83,7 +83,7 @@ static inline void ipip6_ecn_decapsulate + struct ipv6hdr *inner_iph = ipipv6_hdr(skb); + + if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) +- IP6_ECN_set_ce(inner_iph); ++ IP6_ECN_set_ce(skb, inner_iph); + } + + /* Add encapsulation header. diff --git a/queue-3.14/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch b/queue-3.14/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch new file mode 100644 index 00000000000..d487c22da14 --- /dev/null +++ b/queue-3.14/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Ben Hutchings +Date: Sun, 1 Nov 2015 16:21:24 +0000 +Subject: isdn_ppp: Add checks for allocation failure in isdn_ppp_open() +Status: RO +Content-Length: 996 +Lines: 37 + +From: Ben Hutchings + +[ Upstream commit 0baa57d8dc32db78369d8b5176ef56c5e2e18ab3 ] + +Compile-tested only. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/i4l/isdn_ppp.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/isdn/i4l/isdn_ppp.c ++++ b/drivers/isdn/i4l/isdn_ppp.c +@@ -301,6 +301,8 @@ isdn_ppp_open(int min, struct file *file + is->compflags = 0; + + is->reset = isdn_ppp_ccp_reset_alloc(is); ++ if (!is->reset) ++ return -ENOMEM; + + is->lp = NULL; + is->mp_seqno = 0; /* MP sequence number */ +@@ -320,6 +322,10 @@ isdn_ppp_open(int min, struct file *file + * VJ header compression init + */ + is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ ++ if (!is->slcomp) { ++ isdn_ppp_ccp_reset_free(is); ++ return -ENOMEM; ++ } + #endif + #ifdef CONFIG_IPPP_FILTER + is->pass_filter = NULL; diff --git a/queue-3.14/net-possible-use-after-free-in-dst_release.patch b/queue-3.14/net-possible-use-after-free-in-dst_release.patch new file mode 100644 index 00000000000..2c6d51357af --- /dev/null +++ b/queue-3.14/net-possible-use-after-free-in-dst_release.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Francesco Ruggeri +Date: Wed, 6 Jan 2016 00:18:48 -0800 +Subject: net: possible use after free in dst_release +Status: RO +Content-Length: 1116 +Lines: 36 + +From: Francesco Ruggeri + +[ Upstream commit 07a5d38453599052aff0877b16bb9c1585f08609 ] + +dst_release should not access dst->flags after decrementing +__refcnt to 0. The dst_entry may be in dst_busy_list and +dst_gc_task may dst_destroy it before dst_release gets a chance +to access dst->flags. + +Fixes: d69bbf88c8d0 ("net: fix a race in dst_release()") +Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") +Signed-off-by: Francesco Ruggeri +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dst.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -280,10 +280,11 @@ void dst_release(struct dst_entry *dst) + { + if (dst) { + int newrefcnt; ++ unsigned short nocache = dst->flags & DST_NOCACHE; + + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); +- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) ++ if (!newrefcnt && unlikely(nocache)) + call_rcu(&dst->rcu_head, dst_destroy_rcu); + } + } diff --git a/queue-3.14/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch b/queue-3.14/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch new file mode 100644 index 00000000000..b7fd433db77 --- /dev/null +++ b/queue-3.14/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch @@ -0,0 +1,37 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Sasha Levin +Date: Thu, 7 Jan 2016 14:52:43 -0500 +Subject: net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory +Status: RO +Content-Length: 955 +Lines: 31 + +From: Sasha Levin + +[ Upstream commit 320f1a4a175e7cd5d3f006f92b4d4d3e2cbb7bb5 ] + +proc_dostring() needs an initialized destination string, while the one +provided in proc_sctp_do_hmac_alg() contains stack garbage. + +Thus, writing to cookie_hmac_alg would strlen() that garbage and end up +accessing invalid memory. + +Fixes: 3c68198e7 ("sctp: Make hmac algorithm selection for cookie generation dynamic") +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sysctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/sysctl.c ++++ b/net/sctp/sysctl.c +@@ -310,7 +310,7 @@ static int proc_sctp_do_hmac_alg(struct + struct ctl_table tbl; + bool changed = false; + char *none = "none"; +- char tmp[8]; ++ char tmp[8] = {0}; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); diff --git a/queue-3.14/phonet-properly-unshare-skbs-in-phonet_rcv.patch b/queue-3.14/phonet-properly-unshare-skbs-in-phonet_rcv.patch new file mode 100644 index 00000000000..51cd36a7031 --- /dev/null +++ b/queue-3.14/phonet-properly-unshare-skbs-in-phonet_rcv.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Eric Dumazet +Date: Tue, 12 Jan 2016 08:58:00 -0800 +Subject: phonet: properly unshare skbs in phonet_rcv() +Status: RO +Content-Length: 1319 +Lines: 42 + +From: Eric Dumazet + +[ Upstream commit 7aaed57c5c2890634cfadf725173c7c68ea4cb4f ] + +Ivaylo Dimitrov reported a regression caused by commit 7866a621043f +("dev: add per net_device packet type chains"). + +skb->dev becomes NULL and we crash in __netif_receive_skb_core(). + +Before above commit, different kind of bugs or corruptions could happen +without major crash. + +But the root cause is that phonet_rcv() can queue skb without checking +if skb is shared or not. + +Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests. + +Reported-by: Ivaylo Dimitrov +Tested-by: Ivaylo Dimitrov +Signed-off-by: Eric Dumazet +Cc: Remi Denis-Courmont +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/phonet/af_phonet.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/phonet/af_phonet.c ++++ b/net/phonet/af_phonet.c +@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *sk + struct sockaddr_pn sa; + u16 len; + ++ skb = skb_share_check(skb, GFP_ATOMIC); ++ if (!skb) ++ return NET_RX_DROP; ++ + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; diff --git a/queue-3.14/ppp-slip-validate-vj-compression-slot-parameters-completely.patch b/queue-3.14/ppp-slip-validate-vj-compression-slot-parameters-completely.patch new file mode 100644 index 00000000000..a7d5169e44e --- /dev/null +++ b/queue-3.14/ppp-slip-validate-vj-compression-slot-parameters-completely.patch @@ -0,0 +1,136 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Ben Hutchings +Date: Sun, 1 Nov 2015 16:22:53 +0000 +Subject: ppp, slip: Validate VJ compression slot parameters completely +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Status: RO +Content-Length: 4165 +Lines: 133 + +From: Ben Hutchings + +[ Upstream commit 4ab42d78e37a294ac7bc56901d563c642e03c4ae ] + +Currently slhc_init() treats out-of-range values of rslots and tslots +as equivalent to 0, except that if tslots is too large it will +dereference a null pointer (CVE-2015-7799). + +Add a range-check at the top of the function and make it return an +ERR_PTR() on error instead of NULL. Change the callers accordingly. + +Compile-tested only. + +Reported-by: 郭永刚 +References: http://article.gmane.org/gmane.comp.security.oss.general/17908 +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/i4l/isdn_ppp.c | 10 ++++------ + drivers/net/ppp/ppp_generic.c | 6 ++---- + drivers/net/slip/slhc.c | 12 ++++++++---- + drivers/net/slip/slip.c | 2 +- + 4 files changed, 15 insertions(+), 15 deletions(-) + +--- a/drivers/isdn/i4l/isdn_ppp.c ++++ b/drivers/isdn/i4l/isdn_ppp.c +@@ -322,9 +322,9 @@ isdn_ppp_open(int min, struct file *file + * VJ header compression init + */ + is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ +- if (!is->slcomp) { ++ if (IS_ERR(is->slcomp)) { + isdn_ppp_ccp_reset_free(is); +- return -ENOMEM; ++ return PTR_ERR(is->slcomp); + } + #endif + #ifdef CONFIG_IPPP_FILTER +@@ -574,10 +574,8 @@ isdn_ppp_ioctl(int min, struct file *fil + is->maxcid = val; + #ifdef CONFIG_ISDN_PPP_VJ + sltmp = slhc_init(16, val); +- if (!sltmp) { +- printk(KERN_ERR "ippp, can't realloc slhc struct\n"); +- return -ENOMEM; +- } ++ if (IS_ERR(sltmp)) ++ return PTR_ERR(sltmp); + if (is->slcomp) + slhc_free(is->slcomp); + is->slcomp = sltmp; +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -716,10 +716,8 @@ static long ppp_ioctl(struct file *file, + val &= 0xffff; + } + vj = slhc_init(val2+1, val+1); +- if (!vj) { +- netdev_err(ppp->dev, +- "PPP: no memory (VJ compressor)\n"); +- err = -ENOMEM; ++ if (IS_ERR(vj)) { ++ err = PTR_ERR(vj); + break; + } + ppp_lock(ppp); +--- a/drivers/net/slip/slhc.c ++++ b/drivers/net/slip/slhc.c +@@ -84,8 +84,9 @@ static long decode(unsigned char **cpp); + static unsigned char * put16(unsigned char *cp, unsigned short x); + static unsigned short pull16(unsigned char **cpp); + +-/* Initialize compression data structure ++/* Allocate compression data structure + * slots must be in range 0 to 255 (zero meaning no compression) ++ * Returns pointer to structure or ERR_PTR() on error. + */ + struct slcompress * + slhc_init(int rslots, int tslots) +@@ -94,11 +95,14 @@ slhc_init(int rslots, int tslots) + register struct cstate *ts; + struct slcompress *comp; + ++ if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) ++ return ERR_PTR(-EINVAL); ++ + comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); + if (! comp) + goto out_fail; + +- if ( rslots > 0 && rslots < 256 ) { ++ if (rslots > 0) { + size_t rsize = rslots * sizeof(struct cstate); + comp->rstate = kzalloc(rsize, GFP_KERNEL); + if (! comp->rstate) +@@ -106,7 +110,7 @@ slhc_init(int rslots, int tslots) + comp->rslot_limit = rslots - 1; + } + +- if ( tslots > 0 && tslots < 256 ) { ++ if (tslots > 0) { + size_t tsize = tslots * sizeof(struct cstate); + comp->tstate = kzalloc(tsize, GFP_KERNEL); + if (! comp->tstate) +@@ -141,7 +145,7 @@ out_free2: + out_free: + kfree(comp); + out_fail: +- return NULL; ++ return ERR_PTR(-ENOMEM); + } + + +--- a/drivers/net/slip/slip.c ++++ b/drivers/net/slip/slip.c +@@ -164,7 +164,7 @@ static int sl_alloc_bufs(struct slip *sl + if (cbuff == NULL) + goto err_exit; + slcomp = slhc_init(16, 16); +- if (slcomp == NULL) ++ if (IS_ERR(slcomp)) + goto err_exit; + #endif + spin_lock_bh(&sl->lock); diff --git a/queue-3.14/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch b/queue-3.14/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch new file mode 100644 index 00000000000..b7c8f2462f6 --- /dev/null +++ b/queue-3.14/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch @@ -0,0 +1,72 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Xin Long +Date: Tue, 29 Dec 2015 17:49:25 +0800 +Subject: sctp: sctp should release assoc when sctp_make_abort_user return NULL in sctp_close +Status: RO +Content-Length: 2604 +Lines: 68 + +From: Xin Long + +[ Upstream commit 068d8bd338e855286aea54e70d1c101569284b21 ] + +In sctp_close, sctp_make_abort_user may return NULL because of memory +allocation failure. If this happens, it will bypass any state change +and never free the assoc. The assoc has no chance to be freed and it +will be kept in memory with the state it had even after the socket is +closed by sctp_close(). + +So if sctp_make_abort_user fails to allocate memory, we should abort +the asoc via sctp_primitive_ABORT as well. Just like the annotation in +sctp_sf_cookie_wait_prm_abort and sctp_sf_do_9_1_prm_abort said, +"Even if we can't send the ABORT due to low memory delete the TCB. +This is a departure from our typical NOMEM handling". + +But then the chunk is NULL (low memory) and the SCTP_CMD_REPLY cmd would +dereference the chunk pointer, and system crash. So we should add +SCTP_CMD_REPLY cmd only when the chunk is not NULL, just like other +places where it adds SCTP_CMD_REPLY cmd. + +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_statefuns.c | 6 ++++-- + net/sctp/socket.c | 3 +-- + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -4833,7 +4833,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_ab + + retval = SCTP_DISPOSITION_CONSUME; + +- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); ++ if (abort) ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + + /* Even if we can't send the ABORT due to low memory delete the + * TCB. This is a departure from our typical NOMEM handling. +@@ -4970,7 +4971,8 @@ sctp_disposition_t sctp_sf_cookie_wait_p + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); + retval = SCTP_DISPOSITION_CONSUME; + +- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); ++ if (abort) ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_CLOSED)); +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1517,8 +1517,7 @@ static void sctp_close(struct sock *sk, + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, NULL, 0); +- if (chunk) +- sctp_primitive_ABORT(net, asoc, chunk); ++ sctp_primitive_ABORT(net, asoc, chunk); + } else + sctp_primitive_SHUTDOWN(net, asoc, NULL); + } diff --git a/queue-3.14/series b/queue-3.14/series index 42b7d633ae6..307bd3c876c 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -28,3 +28,19 @@ usb-xhci-fix-config-fail-of-fs-hub-behind-a-hs-hub-with-mtt.patch usb-ipaq.c-fix-a-timeout-loop.patch usb-cp210x-add-id-for-elv-marble-sound-board-1.patch xhci-refuse-loading-if-nousb-is-used.patch +utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch +ipv6-addrlabel-fix-ip6addrlbl_get.patch +sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch +connector-bump-skb-users-before-callback-invocation.patch +unix-properly-account-for-fds-passed-over-unix-sockets.patch +bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch +net-possible-use-after-free-in-dst_release.patch +vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch +net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch +tcp_yeah-don-t-set-ssthresh-below-2.patch +bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch +phonet-properly-unshare-skbs-in-phonet_rcv.patch +ipv6-update-skb-csum-when-ce-mark-is-propagated.patch +isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch +ppp-slip-validate-vj-compression-slot-parameters-completely.patch +team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch diff --git a/queue-3.14/tcp_yeah-don-t-set-ssthresh-below-2.patch b/queue-3.14/tcp_yeah-don-t-set-ssthresh-below-2.patch new file mode 100644 index 00000000000..aac302266dc --- /dev/null +++ b/queue-3.14/tcp_yeah-don-t-set-ssthresh-below-2.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Neal Cardwell +Date: Mon, 11 Jan 2016 13:42:43 -0500 +Subject: tcp_yeah: don't set ssthresh below 2 +Status: RO +Content-Length: 1587 +Lines: 43 + +From: Neal Cardwell + +[ Upstream commit 83d15e70c4d8909d722c0d64747d8fb42e38a48f ] + +For tcp_yeah, use an ssthresh floor of 2, the same floor used by Reno +and CUBIC, per RFC 5681 (equation 4). + +tcp_yeah_ssthresh() was sometimes returning a 0 or negative ssthresh +value if the intended reduction is as big or bigger than the current +cwnd. Congestion control modules should never return a zero or +negative ssthresh. A zero ssthresh generally results in a zero cwnd, +causing the connection to stall. A negative ssthresh value will be +interpreted as a u32 and will set a target cwnd for PRR near 4 +billion. + +Oleksandr Natalenko reported that a system using tcp_yeah with ECN +could see a warning about a prior_cwnd of 0 in +tcp_cwnd_reduction(). Testing verified that this was due to +tcp_yeah_ssthresh() misbehaving in this way. + +Reported-by: Oleksandr Natalenko +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_yeah.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_yeah.c ++++ b/net/ipv4/tcp_yeah.c +@@ -223,7 +223,7 @@ static u32 tcp_yeah_ssthresh(struct sock + yeah->fast_count = 0; + yeah->reno_count = max(yeah->reno_count>>1, 2U); + +- return tp->snd_cwnd - reduction; ++ return max_t(int, tp->snd_cwnd - reduction, 2); + } + + static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/queue-3.14/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch b/queue-3.14/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch new file mode 100644 index 00000000000..7db79129059 --- /dev/null +++ b/queue-3.14/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Ido Schimmel +Date: Mon, 18 Jan 2016 17:30:22 +0200 +Subject: team: Replace rcu_read_lock with a mutex in team_vlan_rx_kill_vid +Status: RO +Content-Length: 1200 +Lines: 36 + +From: Ido Schimmel + +[ Upstream commit 60a6531bfe49555581ccd65f66a350cc5693fcde ] + +We can't be within an RCU read-side critical section when deleting +VLANs, as underlying drivers might sleep during the hardware operation. +Therefore, replace the RCU critical section with a mutex. This is +consistent with team_vlan_rx_add_vid. + +Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") +Acked-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1838,10 +1838,10 @@ static int team_vlan_rx_kill_vid(struct + struct team *team = netdev_priv(dev); + struct team_port *port; + +- rcu_read_lock(); +- list_for_each_entry_rcu(port, &team->port_list, list) ++ mutex_lock(&team->lock); ++ list_for_each_entry(port, &team->port_list, list) + vlan_vid_del(port->dev, proto, vid); +- rcu_read_unlock(); ++ mutex_unlock(&team->lock); + + return 0; + } diff --git a/queue-3.14/unix-properly-account-for-fds-passed-over-unix-sockets.patch b/queue-3.14/unix-properly-account-for-fds-passed-over-unix-sockets.patch new file mode 100644 index 00000000000..e12a960a5e7 --- /dev/null +++ b/queue-3.14/unix-properly-account-for-fds-passed-over-unix-sockets.patch @@ -0,0 +1,138 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: willy tarreau +Date: Sun, 10 Jan 2016 07:54:56 +0100 +Subject: unix: properly account for FDs passed over unix sockets +Status: RO +Content-Length: 4253 +Lines: 136 + +From: willy tarreau + +[ Upstream commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 ] + +It is possible for a process to allocate and accumulate far more FDs than +the process' limit by sending them over a unix socket then closing them +to keep the process' fd count low. + +This change addresses this problem by keeping track of the number of FDs +in flight per user and preventing non-privileged processes from having +more FDs in flight than their configured FD limit. + +Reported-by: socketpair@gmail.com +Reported-by: Tetsuo Handa +Mitigates: CVE-2013-4312 (Linux 2.0+) +Suggested-by: Linus Torvalds +Acked-by: Hannes Frederic Sowa +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 1 + + net/unix/af_unix.c | 24 ++++++++++++++++++++---- + net/unix/garbage.c | 16 ++++++++++++---- + 3 files changed, 33 insertions(+), 8 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -755,6 +755,7 @@ struct user_struct { + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ + #endif + unsigned long locked_shm; /* How many pages of mlocked shm ? */ ++ unsigned long unix_inflight; /* How many files in flight in unix sockets */ + + #ifdef CONFIG_KEYS + struct key *uid_keyring; /* UID specific keyring */ +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1486,6 +1486,21 @@ static void unix_destruct_scm(struct sk_ + sock_wfree(skb); + } + ++/* ++ * The "user->unix_inflight" variable is protected by the garbage ++ * collection lock, and we just read it locklessly here. If you go ++ * over the limit, there might be a tiny race in actually noticing ++ * it across threads. Tough. ++ */ ++static inline bool too_many_unix_fds(struct task_struct *p) ++{ ++ struct user_struct *user = current_user(); ++ ++ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) ++ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); ++ return false; ++} ++ + #define MAX_RECURSION_LEVEL 4 + + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +@@ -1494,6 +1509,9 @@ static int unix_attach_fds(struct scm_co + unsigned char max_level = 0; + int unix_sock_count = 0; + ++ if (too_many_unix_fds(current)) ++ return -ETOOMANYREFS; ++ + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + +@@ -1515,10 +1533,8 @@ static int unix_attach_fds(struct scm_co + if (!UNIXCB(skb).fp) + return -ENOMEM; + +- if (unix_sock_count) { +- for (i = scm->fp->count - 1; i >= 0; i--) +- unix_inflight(scm->fp->fp[i]); +- } ++ for (i = scm->fp->count - 1; i >= 0; i--) ++ unix_inflight(scm->fp->fp[i]); + return max_level; + } + +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -125,9 +125,12 @@ struct sock *unix_get_socket(struct file + void unix_inflight(struct file *fp) + { + struct sock *s = unix_get_socket(fp); ++ ++ spin_lock(&unix_gc_lock); ++ + if (s) { + struct unix_sock *u = unix_sk(s); +- spin_lock(&unix_gc_lock); ++ + if (atomic_long_inc_return(&u->inflight) == 1) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); +@@ -135,22 +138,27 @@ void unix_inflight(struct file *fp) + BUG_ON(list_empty(&u->link)); + } + unix_tot_inflight++; +- spin_unlock(&unix_gc_lock); + } ++ fp->f_cred->user->unix_inflight++; ++ spin_unlock(&unix_gc_lock); + } + + void unix_notinflight(struct file *fp) + { + struct sock *s = unix_get_socket(fp); ++ ++ spin_lock(&unix_gc_lock); ++ + if (s) { + struct unix_sock *u = unix_sk(s); +- spin_lock(&unix_gc_lock); ++ + BUG_ON(list_empty(&u->link)); + if (atomic_long_dec_and_test(&u->inflight)) + list_del_init(&u->link); + unix_tot_inflight--; +- spin_unlock(&unix_gc_lock); + } ++ fp->f_cred->user->unix_inflight--; ++ spin_unlock(&unix_gc_lock); + } + + static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), diff --git a/queue-3.14/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch b/queue-3.14/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch new file mode 100644 index 00000000000..148e9e4835b --- /dev/null +++ b/queue-3.14/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch @@ -0,0 +1,75 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Vijay Pandurangan +Date: Fri, 18 Dec 2015 14:34:59 -0500 +Subject: =?UTF-8?q?veth:=20don=E2=80=99t=20modify=20ip=5Fsum?= =?UTF-8?q?med;=20doing=20so=20treats=20packets=20with=20bad=20checksums?= =?UTF-8?q?=20as=20good.?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Status: RO +Content-Length: 2983 +Lines: 66 + +From: Vijay Pandurangan + +[ Upstream commit ce8c839b74e3017996fad4e1b7ba2e2625ede82f ] + +Packets that arrive from real hardware devices have ip_summed == +CHECKSUM_UNNECESSARY if the hardware verified the checksums, or +CHECKSUM_NONE if the packet is bad or it was unable to verify it. The +current version of veth will replace CHECKSUM_NONE with +CHECKSUM_UNNECESSARY, which causes corrupt packets routed from hardware to +a veth device to be delivered to the application. This caused applications +at Twitter to receive corrupt data when network hardware was corrupting +packets. + +We believe this was added as an optimization to skip computing and +verifying checksums for communication between containers. However, locally +generated packets have ip_summed == CHECKSUM_PARTIAL, so the code as +written does nothing for them. As far as we can tell, after removing this +code, these packets are transmitted from one stack to another unmodified +(tcpdump shows invalid checksums on both sides, as expected), and they are +delivered correctly to applications. We didn’t test every possible network +configuration, but we tried a few common ones such as bridging containers, +using NAT between the host and a container, and routing from hardware +devices to containers. We have effectively deployed this in production at +Twitter (by disabling RX checksum offloading on veth devices). + +This code dates back to the first version of the driver, commit + ("[NET]: Virtual ethernet device driver"), so I +suspect this bug occurred mostly because the driver API has evolved +significantly since then. Commit <0b7967503dc97864f283a> ("net/veth: Fix +packet checksumming") (in December 2010) fixed this for packets that get +created locally and sent to hardware devices, by not changing +CHECKSUM_PARTIAL. However, the same issue still occurs for packets coming +in from hardware devices. + +Co-authored-by: Evan Jones +Signed-off-by: Evan Jones +Cc: Nicolas Dichtel +Cc: Phil Sutter +Cc: Toshiaki Makita +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Vijay Pandurangan +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/veth.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -116,12 +116,6 @@ static netdev_tx_t veth_xmit(struct sk_b + kfree_skb(skb); + goto drop; + } +- /* don't change ip_summed == CHECKSUM_PARTIAL, as that +- * will cause bad checksum on forwarded packets +- */ +- if (skb->ip_summed == CHECKSUM_NONE && +- rcv->features & NETIF_F_RXCSUM) +- skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); diff --git a/queue-3.14/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch b/queue-3.14/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch new file mode 100644 index 00000000000..c793f381a37 --- /dev/null +++ b/queue-3.14/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch @@ -0,0 +1,78 @@ +From foo@baz Tue Jan 26 22:23:35 PST 2016 +From: Nicolas Dichtel +Date: Thu, 7 Jan 2016 11:26:53 +0100 +Subject: vxlan: fix test which detect duplicate vxlan iface +Content-Length: 2895 +Lines: 75 + +From: Nicolas Dichtel + +[ Upstream commit 07b9b37c227cb8d88d478b4a9c5634fee514ede1 ] + +When a vxlan interface is created, the driver checks that there is not +another vxlan interface with the same properties. To do this, it checks +the existing vxlan udp socket. Since commit 1c51a9159dde, the creation of +the vxlan socket is done only when the interface is set up, thus it breaks +that test. + +Example: +$ ip l a vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 +$ ip l a vxlan11 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 +$ ip -br l | grep vxlan +vxlan10 DOWN f2:55:1c:6a:fb:00 +vxlan11 DOWN 7a:cb:b9:38:59:0d + +Instead of checking sockets, let's loop over the vxlan iface list. + +Fixes: 1c51a9159dde ("vxlan: fix race caused by dropping rtnl_unlock") +Reported-by: Thomas Faivre +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 12 ++++++++---- + include/net/vxlan.h | 5 +++++ + 2 files changed, 13 insertions(+), 4 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2607,7 +2607,7 @@ static int vxlan_newlink(struct net *net + struct nlattr *tb[], struct nlattr *data[]) + { + struct vxlan_net *vn = net_generic(net, vxlan_net_id); +- struct vxlan_dev *vxlan = netdev_priv(dev); ++ struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; + struct vxlan_rdst *dst = &vxlan->default_dst; + __u32 vni; + int err; +@@ -2715,9 +2715,13 @@ static int vxlan_newlink(struct net *net + if (data[IFLA_VXLAN_PORT]) + vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); + +- if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, +- vxlan->dst_port)) { +- pr_info("duplicate VNI %u\n", vni); ++ list_for_each_entry(tmp, &vn->vxlan_list, next) { ++ if (tmp->default_dst.remote_vni == vni && ++ (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || ++ tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 && ++ tmp->dst_port == vxlan->dst_port && ++ (tmp->flags & VXLAN_F_RCV_FLAGS) == ++ (vxlan->flags & VXLAN_F_RCV_FLAGS)) + return -EEXIST; + } + +--- a/include/net/vxlan.h ++++ b/include/net/vxlan.h +@@ -24,6 +24,11 @@ struct vxlan_sock { + struct udp_offload udp_offloads; + }; + ++/* Flags that are used in the receive path. These flags must match in ++ * order for a socket to be shareable ++ */ ++#define VXLAN_F_RCV_FLAGS VXLAN_F_UDP_ZERO_CSUM6_RX ++ + struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, + vxlan_rcv_t *rcv, void *data, + bool no_share, bool ipv6); -- 2.47.3