From: Greg Kroah-Hartman Date: Sat, 26 Jul 2014 17:06:09 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.4.100~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=45b3cddb5e378a326c39a346b89e57c5158c3ba8;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: 8021q-fix-a-potential-memory-leak.patch appletalk-fix-socket-referencing-in-skb.patch be2net-set-eq-db-clear-intr-bit-in-be_open.patch bnx2x-fix-possible-panic-under-memory-stress.patch dns_resolver-assure-that-dns_query-result-is-null-terminated.patch dns_resolver-null-terminate-the-right-string.patch igmp-fix-the-problem-when-mc-leave-group.patch ip_tunnel-fix-ip_tunnel_lookup.patch ipv4-fix-buffer-overflow-in-ip_options_compile.patch ipv4-fix-dst-race-in-sk_dst_get.patch ipv4-icmp-fix-pmtu-handling-for-rare-case.patch ipv4-irq-safe-sk_dst_set-and-ipv4_sk_update_pmtu-fix.patch net-fix-sparse-warning-in-sk_dst_set.patch net-mvneta-fix-big-endian-issue-in-mvneta_txq_desc_csum.patch net-mvneta-fix-operation-in-10-mbit-s-mode.patch net-pppoe-use-correct-channel-mtu-when-using-multilink-ppp.patch net-qmi_wwan-add-id-for-telewell-tw-lte-4g-v2.patch net-qmi_wwan-add-two-sierra-wireless-netgear-devices.patch net-sctp-check-proc_dointvec-result-in-proc_sctp_do_auth.patch net-sctp-fix-information-leaks-in-ulpevent-layer.patch netlink-fix-handling-of-error-from-netlink_dump.patch sunvnet-clean-up-objects-created-in-vnet_new-on-vnet_exit.patch tcp-fix-divide-by-zero-when-pushing-during-tcp-repair.patch tcp-fix-false-undo-corner-cases.patch tcp-fix-tcp_match_skb_to_sack-for-unaligned-sack-at-end-of-an-skb.patch tipc-clear-next-pointer-of-message-fragments-before-reassembly.patch --- diff --git a/queue-3.10/8021q-fix-a-potential-memory-leak.patch b/queue-3.10/8021q-fix-a-potential-memory-leak.patch new file mode 100644 index 00000000000..387522e0abf --- /dev/null +++ b/queue-3.10/8021q-fix-a-potential-memory-leak.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Li RongQing +Date: Wed, 18 Jun 2014 13:46:02 +0800 +Subject: 8021q: fix a potential memory leak + +From: Li RongQing + +[ Upstream commit 916c1689a09bc1ca81f2d7a34876f8d35aadd11b ] + +skb_cow called in vlan_reorder_header does not free the skb when it failed, +and vlan_reorder_header returns NULL to reset original skb when it is called +in vlan_untag, lead to a memory leak. + +Signed-off-by: Li RongQing +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/8021q/vlan_core.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/8021q/vlan_core.c ++++ b/net/8021q/vlan_core.c +@@ -103,8 +103,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_id); + + static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) + { +- if (skb_cow(skb, skb_headroom(skb)) < 0) ++ if (skb_cow(skb, skb_headroom(skb)) < 0) { ++ kfree_skb(skb); + return NULL; ++ } ++ + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + return skb; diff --git a/queue-3.10/appletalk-fix-socket-referencing-in-skb.patch b/queue-3.10/appletalk-fix-socket-referencing-in-skb.patch new file mode 100644 index 00000000000..a50dff7c75f --- /dev/null +++ b/queue-3.10/appletalk-fix-socket-referencing-in-skb.patch @@ -0,0 +1,44 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Andrey Utkin +Date: Mon, 7 Jul 2014 23:22:50 +0300 +Subject: appletalk: Fix socket referencing in skb + +From: Andrey Utkin + +[ Upstream commit 36beddc272c111689f3042bf3d10a64d8a805f93 ] + +Setting just skb->sk without taking its reference and setting a +destructor is invalid. However, in the places where this was done, skb +is used in a way not requiring skb->sk setting. So dropping the setting +of skb->sk. +Thanks to Eric Dumazet for correct solution. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79441 +Reported-by: Ed Martin +Signed-off-by: Andrey Utkin +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/appletalk/ddp.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/appletalk/ddp.c ++++ b/net/appletalk/ddp.c +@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb + goto drop; + + /* Queue packet (standard) */ +- skb->sk = sock; +- + if (sock_queue_rcv_skb(sock, skb) < 0) + goto drop; + +@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *i + if (!skb) + goto out; + +- skb->sk = sk; + skb_reserve(skb, ddp_dl->header_length); + skb_reserve(skb, dev->hard_header_len); + skb->dev = dev; diff --git a/queue-3.10/be2net-set-eq-db-clear-intr-bit-in-be_open.patch b/queue-3.10/be2net-set-eq-db-clear-intr-bit-in-be_open.patch new file mode 100644 index 00000000000..d4caacf17ec --- /dev/null +++ b/queue-3.10/be2net-set-eq-db-clear-intr-bit-in-be_open.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Suresh Reddy +Date: Fri, 11 Jul 2014 14:03:01 +0530 +Subject: be2net: set EQ DB clear-intr bit in be_open() + +From: Suresh Reddy + +[ Upstream commit 4cad9f3b61c7268fa89ab8096e23202300399b5d ] + +On BE3, if the clear-interrupt bit of the EQ doorbell is not set the first +time it is armed, ocassionally we have observed that the EQ doesn't raise +anymore interrupts even if it is in armed state. +This patch fixes this by setting the clear-interrupt bit when EQs are +armed for the first time in be_open(). + +Signed-off-by: Suresh Reddy +Signed-off-by: Sathya Perla +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -2663,7 +2663,7 @@ static int be_open(struct net_device *ne + + for_all_evt_queues(adapter, eqo, i) { + napi_enable(&eqo->napi); +- be_eq_notify(adapter, eqo->q.id, true, false, 0); ++ be_eq_notify(adapter, eqo->q.id, true, true, 0); + } + adapter->flags |= BE_FLAGS_NAPI_ENABLED; + diff --git a/queue-3.10/bnx2x-fix-possible-panic-under-memory-stress.patch b/queue-3.10/bnx2x-fix-possible-panic-under-memory-stress.patch new file mode 100644 index 00000000000..c38bb4449bd --- /dev/null +++ b/queue-3.10/bnx2x-fix-possible-panic-under-memory-stress.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Eric Dumazet +Date: Thu, 26 Jun 2014 00:44:02 -0700 +Subject: bnx2x: fix possible panic under memory stress + +From: Eric Dumazet + +[ Upstream commit 07b0f00964def8af9321cfd6c4a7e84f6362f728 ] + +While it is legal to kfree(NULL), it is not wise to use : +put_page(virt_to_head_page(NULL)) + + BUG: unable to handle kernel paging request at ffffeba400000000 + IP: [] virt_to_head_page+0x36/0x44 [bnx2x] + +Reported-by: Michel Lespinasse +Signed-off-by: Eric Dumazet +Cc: Ariel Elior +Fixes: d46d132cc021 ("bnx2x: use netdev_alloc_frag()") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -745,7 +745,8 @@ static void bnx2x_tpa_stop(struct bnx2x + + return; + } +- bnx2x_frag_free(fp, new_data); ++ if (new_data) ++ bnx2x_frag_free(fp, new_data); + drop: + /* drop the packet and keep the buffer in the bin */ + DP(NETIF_MSG_RX_STATUS, diff --git a/queue-3.10/dns_resolver-assure-that-dns_query-result-is-null-terminated.patch b/queue-3.10/dns_resolver-assure-that-dns_query-result-is-null-terminated.patch new file mode 100644 index 00000000000..017c02744dc --- /dev/null +++ b/queue-3.10/dns_resolver-assure-that-dns_query-result-is-null-terminated.patch @@ -0,0 +1,35 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= +Date: Sat, 7 Jun 2014 23:57:25 +0200 +Subject: dns_resolver: assure that dns_query() result is null-terminated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= + +[ Upstream commit 84a7c0b1db1c17d5ded8d3800228a608e1070b40 ] + +dns_query() credulously assumes that keys are null-terminated and +returns a copy of a memory block that is off by one. + +Signed-off-by: Manuel Schölling +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dns_resolver/dns_query.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/dns_resolver/dns_query.c ++++ b/net/dns_resolver/dns_query.c +@@ -150,7 +150,9 @@ int dns_query(const char *type, const ch + if (!*_result) + goto put; + +- memcpy(*_result, upayload->data, len + 1); ++ memcpy(*_result, upayload->data, len); ++ *_result[len] = '\0'; ++ + if (_expiry) + *_expiry = rkey->expiry; + diff --git a/queue-3.10/dns_resolver-null-terminate-the-right-string.patch b/queue-3.10/dns_resolver-null-terminate-the-right-string.patch new file mode 100644 index 00000000000..8f99330ed05 --- /dev/null +++ b/queue-3.10/dns_resolver-null-terminate-the-right-string.patch @@ -0,0 +1,31 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Ben Hutchings +Date: Mon, 21 Jul 2014 00:06:48 +0100 +Subject: dns_resolver: Null-terminate the right string + +From: Ben Hutchings + +[ Upstream commit 640d7efe4c08f06c4ae5d31b79bd8740e7f6790a ] + +*_result[len] is parsed as *(_result[len]) which is not at all what we +want to touch here. + +Signed-off-by: Ben Hutchings +Fixes: 84a7c0b1db1c ("dns_resolver: assure that dns_query() result is null-terminated") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dns_resolver/dns_query.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/dns_resolver/dns_query.c ++++ b/net/dns_resolver/dns_query.c +@@ -151,7 +151,7 @@ int dns_query(const char *type, const ch + goto put; + + memcpy(*_result, upayload->data, len); +- *_result[len] = '\0'; ++ (*_result)[len] = '\0'; + + if (_expiry) + *_expiry = rkey->expiry; diff --git a/queue-3.10/igmp-fix-the-problem-when-mc-leave-group.patch b/queue-3.10/igmp-fix-the-problem-when-mc-leave-group.patch new file mode 100644 index 00000000000..ed3707c5309 --- /dev/null +++ b/queue-3.10/igmp-fix-the-problem-when-mc-leave-group.patch @@ -0,0 +1,84 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: dingtianhong +Date: Wed, 2 Jul 2014 13:50:48 +0800 +Subject: igmp: fix the problem when mc leave group + +From: dingtianhong + +[ Upstream commit 52ad353a5344f1f700c5b777175bdfa41d3cd65a ] + +The problem was triggered by these steps: + +1) create socket, bind and then setsockopt for add mc group. + mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37"); + mreq.imr_interface.s_addr = inet_addr("192.168.1.2"); + setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + +2) drop the mc group for this socket. + mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37"); + mreq.imr_interface.s_addr = inet_addr("0.0.0.0"); + setsockopt(sockfd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq)); + +3) and then drop the socket, I found the mc group was still used by the dev: + + netstat -g + + Interface RefCnt Group + --------------- ------ --------------------- + eth2 1 255.0.0.37 + +Normally even though the IP_DROP_MEMBERSHIP return error, the mc group still need +to be released for the netdev when drop the socket, but this process was broken when +route default is NULL, the reason is that: + +The ip_mc_leave_group() will choose the in_dev by the imr_interface.s_addr, if input addr +is NULL, the default route dev will be chosen, then the ifindex is got from the dev, +then polling the inet->mc_list and return -ENODEV, but if the default route dev is NULL, +the in_dev and ifIndex is both NULL, when polling the inet->mc_list, the mc group will be +released from the mc_list, but the dev didn't dec the refcnt for this mc group, so +when dropping the socket, the mc_list is NULL and the dev still keep this group. + +v1->v2: According Hideaki's suggestion, we should align with IPv6 (RFC3493) and BSDs, + so I add the checking for the in_dev before polling the mc_list, make sure when + we remove the mc group, dec the refcnt to the real dev which was using the mc address. + The problem would never happened again. + +Signed-off-by: Ding Tianhong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/igmp.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -1874,6 +1874,10 @@ int ip_mc_leave_group(struct sock *sk, s + + rtnl_lock(); + in_dev = ip_mc_find_dev(net, imr); ++ if (!in_dev) { ++ ret = -ENODEV; ++ goto out; ++ } + ifindex = imr->imr_ifindex; + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; +@@ -1891,16 +1895,14 @@ int ip_mc_leave_group(struct sock *sk, s + + *imlp = iml->next_rcu; + +- if (in_dev) +- ip_mc_dec_group(in_dev, group); ++ ip_mc_dec_group(in_dev, group); + rtnl_unlock(); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + kfree_rcu(iml, rcu); + return 0; + } +- if (!in_dev) +- ret = -ENODEV; ++out: + rtnl_unlock(); + return ret; + } diff --git a/queue-3.10/ip_tunnel-fix-ip_tunnel_lookup.patch b/queue-3.10/ip_tunnel-fix-ip_tunnel_lookup.patch new file mode 100644 index 00000000000..7ec2eecc812 --- /dev/null +++ b/queue-3.10/ip_tunnel-fix-ip_tunnel_lookup.patch @@ -0,0 +1,95 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Dmitry Popov +Date: Sat, 5 Jul 2014 02:26:37 +0400 +Subject: ip_tunnel: fix ip_tunnel_lookup + +From: Dmitry Popov + +[ Upstream commit e0056593b61253f1a8a9941dacda22e73b963cdc ] + +This patch fixes 3 similar bugs where incoming packets might be routed into +wrong non-wildcard tunnels: + +1) Consider the following setup: + ip address add 1.1.1.1/24 dev eth0 + ip address add 1.1.1.2/24 dev eth0 + ip tunnel add ipip1 remote 2.2.2.2 local 1.1.1.1 mode ipip dev eth0 + ip link set ipip1 up + +Incoming ipip packets from 2.2.2.2 were routed into ipip1 even if it has dst = +1.1.1.2. Moreover even if there was wildcard tunnel like + ip tunnel add ipip0 remote 2.2.2.2 local any mode ipip dev eth0 +but it was created before explicit one (with local 1.1.1.1), incoming ipip +packets with src = 2.2.2.2 and dst = 1.1.1.2 were still routed into ipip1. + +Same issue existed with all tunnels that use ip_tunnel_lookup (gre, vti) + +2) ip address add 1.1.1.1/24 dev eth0 + ip tunnel add ipip1 remote 2.2.146.85 local 1.1.1.1 mode ipip dev eth0 + ip link set ipip1 up + +Incoming ipip packets with dst = 1.1.1.1 were routed into ipip1, no matter what +src address is. Any remote ip address which has ip_tunnel_hash = 0 raised this +issue, 2.2.146.85 is just an example, there are more than 4 million of them. +And again, wildcard tunnel like + ip tunnel add ipip0 remote any local 1.1.1.1 mode ipip dev eth0 +wouldn't be ever matched if it was created before explicit tunnel like above. + +Gre & vti tunnels had the same issue. + +3) ip address add 1.1.1.1/24 dev eth0 + ip tunnel add gre1 remote 2.2.146.84 local 1.1.1.1 key 1 mode gre dev eth0 + ip link set gre1 up + +Any incoming gre packet with key = 1 were routed into gre1, no matter what +src/dst addresses are. Any remote ip address which has ip_tunnel_hash = 0 raised +the issue, 2.2.146.84 is just an example, there are more than 4 million of them. +Wildcard tunnel like + ip tunnel add gre2 remote any local any key 1 mode gre dev eth0 +wouldn't be ever matched if it was created before explicit tunnel like above. + +All this stuff happened because while looking for a wildcard tunnel we didn't +check that matched tunnel is a wildcard one. Fixed. + +Signed-off-by: Dmitry Popov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -166,6 +166,7 @@ struct ip_tunnel *ip_tunnel_lookup(struc + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (remote != t->parms.iph.daddr || ++ t->parms.iph.saddr != 0 || + !(t->dev->flags & IFF_UP)) + continue; + +@@ -182,10 +183,11 @@ struct ip_tunnel *ip_tunnel_lookup(struc + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { +- if ((local != t->parms.iph.saddr && +- (local != t->parms.iph.daddr || +- !ipv4_is_multicast(local))) || +- !(t->dev->flags & IFF_UP)) ++ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && ++ (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) ++ continue; ++ ++ if (!(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) +@@ -202,6 +204,8 @@ struct ip_tunnel *ip_tunnel_lookup(struc + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (t->parms.i_key != key || ++ t->parms.iph.saddr != 0 || ++ t->parms.iph.daddr != 0 || + !(t->dev->flags & IFF_UP)) + continue; + diff --git a/queue-3.10/ipv4-fix-buffer-overflow-in-ip_options_compile.patch b/queue-3.10/ipv4-fix-buffer-overflow-in-ip_options_compile.patch new file mode 100644 index 00000000000..c3380113fac --- /dev/null +++ b/queue-3.10/ipv4-fix-buffer-overflow-in-ip_options_compile.patch @@ -0,0 +1,82 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Eric Dumazet +Date: Mon, 21 Jul 2014 07:17:42 +0200 +Subject: ipv4: fix buffer overflow in ip_options_compile() + +From: Eric Dumazet + +[ Upstream commit 10ec9472f05b45c94db3c854d22581a20b97db41 ] + +There is a benign buffer overflow in ip_options_compile spotted by +AddressSanitizer[1] : + +Its benign because we always can access one extra byte in skb->head +(because header is followed by struct skb_shared_info), and in this case +this byte is not even used. + +[28504.910798] ================================================================== +[28504.912046] AddressSanitizer: heap-buffer-overflow in ip_options_compile +[28504.913170] Read of size 1 by thread T15843: +[28504.914026] [] ip_options_compile+0x121/0x9c0 +[28504.915394] [] ip_options_get_from_user+0xad/0x120 +[28504.916843] [] do_ip_setsockopt.isra.15+0x8df/0x1630 +[28504.918175] [] ip_setsockopt+0x30/0xa0 +[28504.919490] [] tcp_setsockopt+0x5b/0x90 +[28504.920835] [] sock_common_setsockopt+0x5f/0x70 +[28504.922208] [] SyS_setsockopt+0xa2/0x140 +[28504.923459] [] system_call_fastpath+0x16/0x1b +[28504.924722] +[28504.925106] Allocated by thread T15843: +[28504.925815] [] ip_options_get_from_user+0x35/0x120 +[28504.926884] [] do_ip_setsockopt.isra.15+0x8df/0x1630 +[28504.927975] [] ip_setsockopt+0x30/0xa0 +[28504.929175] [] tcp_setsockopt+0x5b/0x90 +[28504.930400] [] sock_common_setsockopt+0x5f/0x70 +[28504.931677] [] SyS_setsockopt+0xa2/0x140 +[28504.932851] [] system_call_fastpath+0x16/0x1b +[28504.934018] +[28504.934377] The buggy address ffff880026382828 is located 0 bytes to the right +[28504.934377] of 40-byte region [ffff880026382800, ffff880026382828) +[28504.937144] +[28504.937474] Memory state around the buggy address: +[28504.938430] ffff880026382300: ........ rrrrrrrr rrrrrrrr rrrrrrrr +[28504.939884] ffff880026382400: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28504.941294] ffff880026382500: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr +[28504.942504] ffff880026382600: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28504.943483] ffff880026382700: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28504.944511] >ffff880026382800: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr +[28504.945573] ^ +[28504.946277] ffff880026382900: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28505.094949] ffff880026382a00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28505.096114] ffff880026382b00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28505.097116] ffff880026382c00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28505.098472] ffff880026382d00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr +[28505.099804] Legend: +[28505.100269] f - 8 freed bytes +[28505.100884] r - 8 redzone bytes +[28505.101649] . - 8 allocated bytes +[28505.102406] x=1..7 - x allocated bytes + (8-x) redzone bytes +[28505.103637] ================================================================== + +[1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_options.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/ip_options.c ++++ b/net/ipv4/ip_options.c +@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net, + optptr++; + continue; + } ++ if (unlikely(l < 2)) { ++ pp_ptr = optptr; ++ goto error; ++ } + optlen = optptr[1]; + if (optlen<2 || optlen>l) { + pp_ptr = optptr; diff --git a/queue-3.10/ipv4-fix-dst-race-in-sk_dst_get.patch b/queue-3.10/ipv4-fix-dst-race-in-sk_dst_get.patch new file mode 100644 index 00000000000..fbff59f9b09 --- /dev/null +++ b/queue-3.10/ipv4-fix-dst-race-in-sk_dst_get.patch @@ -0,0 +1,79 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Eric Dumazet +Date: Tue, 24 Jun 2014 10:05:11 -0700 +Subject: ipv4: fix dst race in sk_dst_get() + +From: Eric Dumazet + +[ Upstream commit f88649721268999bdff09777847080a52004f691 ] + +When IP route cache had been removed in linux-3.6, we broke assumption +that dst entries were all freed after rcu grace period. DST_NOCACHE +dst were supposed to be freed from dst_release(). But it appears +we want to keep such dst around, either in UDP sockets or tunnels. + +In sk_dst_get() we need to make sure dst refcount is not 0 +before incrementing it, or else we might end up freeing a dst +twice. + +DST_NOCACHE set on a dst does not mean this dst can not be attached +to a socket or a tunnel. + +Then, before actual freeing, we need to observe a rcu grace period +to make sure all other cpus can catch the fact the dst is no longer +usable. + +Signed-off-by: Eric Dumazet +Reported-by: Dormando +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 4 ++-- + net/core/dst.c | 16 +++++++++++----- + 2 files changed, 13 insertions(+), 7 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1727,8 +1727,8 @@ sk_dst_get(struct sock *sk) + + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); +- if (dst) +- dst_hold(dst); ++ if (dst && !atomic_inc_not_zero(&dst->__refcnt)) ++ dst = NULL; + rcu_read_unlock(); + return dst; + } +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -267,6 +267,15 @@ again: + } + EXPORT_SYMBOL(dst_destroy); + ++static void dst_destroy_rcu(struct rcu_head *head) ++{ ++ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); ++ ++ dst = dst_destroy(dst); ++ if (dst) ++ __dst_free(dst); ++} ++ + void dst_release(struct dst_entry *dst) + { + if (dst) { +@@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst) + + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); +- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { +- dst = dst_destroy(dst); +- if (dst) +- __dst_free(dst); +- } ++ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) ++ call_rcu(&dst->rcu_head, dst_destroy_rcu); + } + } + EXPORT_SYMBOL(dst_release); diff --git a/queue-3.10/ipv4-icmp-fix-pmtu-handling-for-rare-case.patch b/queue-3.10/ipv4-icmp-fix-pmtu-handling-for-rare-case.patch new file mode 100644 index 00000000000..0288ee7a5f0 --- /dev/null +++ b/queue-3.10/ipv4-icmp-fix-pmtu-handling-for-rare-case.patch @@ -0,0 +1,60 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Edward Allcutt +Date: Mon, 30 Jun 2014 16:16:02 +0100 +Subject: ipv4: icmp: Fix pMTU handling for rare case + +From: Edward Allcutt + +[ Upstream commit 68b7107b62983f2cff0948292429d5f5999df096 ] + +Some older router implementations still send Fragmentation Needed +errors with the Next-Hop MTU field set to zero. This is explicitly +described as an eventuality that hosts must deal with by the +standard (RFC 1191) since older standards specified that those +bits must be zero. + +Linux had a generic (for all of IPv4) implementation of the algorithm +described in the RFC for searching a list of MTU plateaus for a good +value. Commit 46517008e116 ("ipv4: Kill ip_rt_frag_needed().") +removed this as part of the changes to remove the routing cache. +Subsequently any Fragmentation Needed packet with a zero Next-Hop +MTU has been discarded without being passed to the per-protocol +handlers or notifying userspace for raw sockets. + +When there is a router which does not implement RFC 1191 on an +MTU limited path then this results in stalled connections since +large packets are discarded and the local protocols are not +notified so they never attempt to lower the pMTU. + +One example I have seen is an OpenBSD router terminating IPSec +tunnels. It's worth pointing out that this case is distinct from +the BSD 4.2 bug which incorrectly calculated the Next-Hop MTU +since the commit in question dismissed that as a valid concern. + +All of the per-protocols handlers implement the simple approach from +RFC 1191 of immediately falling back to the minimum value. Although +this is sub-optimal it is vastly preferable to connections hanging +indefinitely. + +Remove the Next-Hop MTU != 0 check and allow such packets +to follow the normal path. + +Fixes: 46517008e116 ("ipv4: Kill ip_rt_frag_needed().") +Signed-off-by: Edward Allcutt +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/icmp.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -697,8 +697,6 @@ static void icmp_unreach(struct sk_buff + &iph->daddr); + } else { + info = ntohs(icmph->un.frag.mtu); +- if (!info) +- goto out; + } + break; + case ICMP_SR_FAILED: diff --git a/queue-3.10/ipv4-irq-safe-sk_dst_set-and-ipv4_sk_update_pmtu-fix.patch b/queue-3.10/ipv4-irq-safe-sk_dst_set-and-ipv4_sk_update_pmtu-fix.patch new file mode 100644 index 00000000000..ab6e85b3abe --- /dev/null +++ b/queue-3.10/ipv4-irq-safe-sk_dst_set-and-ipv4_sk_update_pmtu-fix.patch @@ -0,0 +1,118 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Eric Dumazet +Date: Mon, 30 Jun 2014 01:26:23 -0700 +Subject: ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix + +From: Eric Dumazet + +[ Upstream commit 7f502361531e9eecb396cf99bdc9e9a59f7ebd7f ] + +We have two different ways to handle changes to sk->sk_dst + +First way (used by TCP) assumes socket lock is owned by caller, and use +no extra lock : __sk_dst_set() & __sk_dst_reset() + +Another way (used by UDP) uses sk_dst_lock because socket lock is not +always taken. Note that sk_dst_lock is not softirq safe. + +These ways are not inter changeable for a given socket type. + +ipv4_sk_update_pmtu(), added in linux-3.8, added a race, as it used +the socket lock as synchronization, but users might be UDP sockets. + +Instead of converting sk_dst_lock to a softirq safe version, use xchg() +as we did for sk_rx_dst in commit e47eb5dfb296b ("udp: ipv4: do not use +sk_dst_lock from softirq context") + +In a follow up patch, we probably can remove sk_dst_lock, as it is +only used in IPv6. + +Signed-off-by: Eric Dumazet +Cc: Steffen Klassert +Fixes: 9cb3a50c5f63e ("ipv4: Invalidate the socket cached route on pmtu events if possible") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 12 ++++++------ + net/ipv4/route.c | 15 ++++++++------- + 2 files changed, 14 insertions(+), 13 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1767,9 +1767,11 @@ __sk_dst_set(struct sock *sk, struct dst + static inline void + sk_dst_set(struct sock *sk, struct dst_entry *dst) + { +- spin_lock(&sk->sk_dst_lock); +- __sk_dst_set(sk, dst); +- spin_unlock(&sk->sk_dst_lock); ++ struct dst_entry *old_dst; ++ ++ sk_tx_queue_clear(sk); ++ old_dst = xchg(&sk->sk_dst_cache, dst); ++ dst_release(old_dst); + } + + static inline void +@@ -1781,9 +1783,7 @@ __sk_dst_reset(struct sock *sk) + static inline void + sk_dst_reset(struct sock *sk) + { +- spin_lock(&sk->sk_dst_lock); +- __sk_dst_reset(sk); +- spin_unlock(&sk->sk_dst_lock); ++ sk_dst_set(sk, NULL); + } + + extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -985,20 +985,21 @@ void ipv4_sk_update_pmtu(struct sk_buff + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; +- struct dst_entry *dst; ++ struct dst_entry *odst = NULL; + bool new = false; + + bh_lock_sock(sk); +- rt = (struct rtable *) __sk_dst_get(sk); ++ odst = sk_dst_get(sk); + +- if (sock_owned_by_user(sk) || !rt) { ++ if (sock_owned_by_user(sk) || !odst) { + __ipv4_sk_update_pmtu(skb, sk, mtu); + goto out; + } + + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + +- if (!__sk_dst_check(sk, 0)) { ++ rt = (struct rtable *)odst; ++ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) + goto out; +@@ -1008,8 +1009,7 @@ void ipv4_sk_update_pmtu(struct sk_buff + + __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + +- dst = dst_check(&rt->dst, 0); +- if (!dst) { ++ if (!dst_check(&rt->dst, 0)) { + if (new) + dst_release(&rt->dst); + +@@ -1021,10 +1021,11 @@ void ipv4_sk_update_pmtu(struct sk_buff + } + + if (new) +- __sk_dst_set(sk, &rt->dst); ++ sk_dst_set(sk, &rt->dst); + + out: + bh_unlock_sock(sk); ++ dst_release(odst); + } + EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); + diff --git a/queue-3.10/net-fix-sparse-warning-in-sk_dst_set.patch b/queue-3.10/net-fix-sparse-warning-in-sk_dst_set.patch new file mode 100644 index 00000000000..64f466406fc --- /dev/null +++ b/queue-3.10/net-fix-sparse-warning-in-sk_dst_set.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Eric Dumazet +Date: Wed, 2 Jul 2014 02:39:38 -0700 +Subject: net: fix sparse warning in sk_dst_set() + +From: Eric Dumazet + +[ Upstream commit 5925a0555bdaf0b396a84318cbc21ba085f6c0d3 ] + +sk_dst_cache has __rcu annotation, so we need a cast to avoid +following sparse error : + +include/net/sock.h:1774:19: warning: incorrect type in initializer (different address spaces) +include/net/sock.h:1774:19: expected struct dst_entry [noderef] *__ret +include/net/sock.h:1774:19: got struct dst_entry *dst + +Signed-off-by: Eric Dumazet +Reported-by: kbuild test robot +Fixes: 7f502361531e ("ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1770,7 +1770,7 @@ sk_dst_set(struct sock *sk, struct dst_e + struct dst_entry *old_dst; + + sk_tx_queue_clear(sk); +- old_dst = xchg(&sk->sk_dst_cache, dst); ++ old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); + dst_release(old_dst); + } + diff --git a/queue-3.10/net-mvneta-fix-big-endian-issue-in-mvneta_txq_desc_csum.patch b/queue-3.10/net-mvneta-fix-big-endian-issue-in-mvneta_txq_desc_csum.patch new file mode 100644 index 00000000000..75b1383dd2f --- /dev/null +++ b/queue-3.10/net-mvneta-fix-big-endian-issue-in-mvneta_txq_desc_csum.patch @@ -0,0 +1,35 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Thomas Fitzsimmons +Date: Tue, 8 Jul 2014 19:44:07 -0400 +Subject: net: mvneta: Fix big endian issue in mvneta_txq_desc_csum() + +From: Thomas Fitzsimmons + +[ Upstream commit 0a1985879437d14bda8c90d0dae3455c467d7642 ] + +This commit fixes the command value generated for CSUM calculation +when running in big endian mode. The Ethernet protocol ID for IP was +being unconditionally byte-swapped in the layer 3 protocol check (with +swab16), which caused the mvneta driver to not function correctly in +big endian mode. This patch byte-swaps the ID conditionally with +htons. + +Cc: # v3.13+ +Signed-off-by: Thomas Fitzsimmons +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1145,7 +1145,7 @@ static u32 mvneta_txq_desc_csum(int l3_o + command = l3_offs << MVNETA_TX_L3_OFF_SHIFT; + command |= ip_hdr_len << MVNETA_TX_IP_HLEN_SHIFT; + +- if (l3_proto == swab16(ETH_P_IP)) ++ if (l3_proto == htons(ETH_P_IP)) + command |= MVNETA_TXD_IP_CSUM; + else + command |= MVNETA_TX_L3_IP6; diff --git a/queue-3.10/net-mvneta-fix-operation-in-10-mbit-s-mode.patch b/queue-3.10/net-mvneta-fix-operation-in-10-mbit-s-mode.patch new file mode 100644 index 00000000000..744df24155d --- /dev/null +++ b/queue-3.10/net-mvneta-fix-operation-in-10-mbit-s-mode.patch @@ -0,0 +1,45 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Thomas Petazzoni +Date: Tue, 8 Jul 2014 10:49:43 +0200 +Subject: net: mvneta: fix operation in 10 Mbit/s mode + +From: Thomas Petazzoni + +[ Upstream commit 4d12bc63ab5e48c1d78fa13883cf6fefcea3afb1 ] + +As reported by Maggie Mae Roxas, the mvneta driver doesn't behave +properly in 10 Mbit/s mode. This is due to a misconfiguration of the +MVNETA_GMAC_AUTONEG_CONFIG register: bit MVNETA_GMAC_CONFIG_MII_SPEED +must be set for a 100 Mbit/s speed, but cleared for a 10 Mbit/s speed, +which the driver was not properly doing. This commit adjusts that by +setting the MVNETA_GMAC_CONFIG_MII_SPEED bit only in 100 Mbit/s mode, +and relying on the fact that all the speed related bits of this +register are cleared at the beginning of the mvneta_adjust_link() +function. + +This problem exists since c5aff18204da0 ("net: mvneta: driver for +Marvell Armada 370/XP network unit") which is the commit that +introduced the mvneta driver in the kernel. + +Cc: # v3.8+ +Fixes: c5aff18204da0 ("net: mvneta: driver for Marvell Armada 370/XP network unit") +Reported-by: Maggie Mae Roxas +Cc: Maggie Mae Roxas +Signed-off-by: Thomas Petazzoni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -2306,7 +2306,7 @@ static void mvneta_adjust_link(struct ne + + if (phydev->speed == SPEED_1000) + val |= MVNETA_GMAC_CONFIG_GMII_SPEED; +- else ++ else if (phydev->speed == SPEED_100) + val |= MVNETA_GMAC_CONFIG_MII_SPEED; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); diff --git a/queue-3.10/net-pppoe-use-correct-channel-mtu-when-using-multilink-ppp.patch b/queue-3.10/net-pppoe-use-correct-channel-mtu-when-using-multilink-ppp.patch new file mode 100644 index 00000000000..2b57bfe5bbd --- /dev/null +++ b/queue-3.10/net-pppoe-use-correct-channel-mtu-when-using-multilink-ppp.patch @@ -0,0 +1,129 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Christoph Schulz +Date: Sun, 13 Jul 2014 00:53:15 +0200 +Subject: net: pppoe: use correct channel MTU when using Multilink PPP + +From: Christoph Schulz + +[ Upstream commit a8a3e41c67d24eb12f9ab9680cbb85e24fcd9711 ] + +The PPP channel MTU is used with Multilink PPP when ppp_mp_explode() (see +ppp_generic module) tries to determine how big a fragment might be. According +to RFC 1661, the MTU excludes the 2-byte PPP protocol field, see the +corresponding comment and code in ppp_mp_explode(): + + /* + * hdrlen includes the 2-byte PPP protocol field, but the + * MTU counts only the payload excluding the protocol field. + * (RFC1661 Section 2) + */ + mtu = pch->chan->mtu - (hdrlen - 2); + +However, the pppoe module *does* include the PPP protocol field in the channel +MTU, which is wrong as it causes the PPP payload to be 1-2 bytes too big under +certain circumstances (one byte if PPP protocol compression is used, two +otherwise), causing the generated Ethernet packets to be dropped. So the pppoe +module has to subtract two bytes from the channel MTU. This error only +manifests itself when using Multilink PPP, as otherwise the channel MTU is not +used anywhere. + +In the following, I will describe how to reproduce this bug. We configure two +pppd instances for multilink PPP over two PPPoE links, say eth2 and eth3, with +a MTU of 1492 bytes for each link and a MRRU of 2976 bytes. (This MRRU is +computed by adding the two link MTUs and subtracting the MP header twice, which +is 4 bytes long.) The necessary pppd statements on both sides are "multilink +mtu 1492 mru 1492 mrru 2976". On the client side, we additionally need "plugin +rp-pppoe.so eth2" and "plugin rp-pppoe.so eth3", respectively; on the server +side, we additionally need to start two pppoe-server instances to be able to +establish two PPPoE sessions, one over eth2 and one over eth3. We set the MTU +of the PPP network interface to the MRRU (2976) on both sides of the connection +in order to make use of the higher bandwidth. (If we didn't do that, IP +fragmentation would kick in, which we want to avoid.) + +Now we send a ICMPv4 echo request with a payload of 2948 bytes from client to +server over the PPP link. This results in the following network packet: + + 2948 (echo payload) + + 8 (ICMPv4 header) + + 20 (IPv4 header) +--------------------- + 2976 (PPP payload) + +These 2976 bytes do not exceed the MTU of the PPP network interface, so the +IP packet is not fragmented. Now the multilink PPP code in ppp_mp_explode() +prepends one protocol byte (0x21 for IPv4), making the packet one byte bigger +than the negotiated MRRU. So this packet would have to be divided in three +fragments. But this does not happen as each link MTU is assumed to be two bytes +larger. So this packet is diveded into two fragments only, one of size 1489 and +one of size 1488. Now we have for that bigger fragment: + + 1489 (PPP payload) + + 4 (MP header) + + 2 (PPP protocol field for the MP payload (0x3d)) + + 6 (PPPoE header) +-------------------------- + 1501 (Ethernet payload) + +This packet exceeds the link MTU and is discarded. + +If one configures the link MTU on the client side to 1501, one can see the +discarded Ethernet frames with tcpdump running on the client. A + +ping -s 2948 -c 1 192.168.15.254 + +leads to the smaller fragment that is correctly received on the server side: + +(tcpdump -vvvne -i eth3 pppoes and ppp proto 0x3d) +52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), + length 1514: PPPoE [ses 0x3] MLPPP (0x003d), length 1494: seq 0x000, + Flags [end], length 1492 + +and to the bigger fragment that is not received on the server side: + +(tcpdump -vvvne -i eth2 pppoes and ppp proto 0x3d) +52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864), + length 1515: PPPoE [ses 0x5] MLPPP (0x003d), length 1495: seq 0x000, + Flags [begin], length 1493 + +With the patch below, we correctly obtain three fragments: + +52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), + length 1514: PPPoE [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000, + Flags [begin], length 1492 +52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864), + length 1514: PPPoE [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000, + Flags [none], length 1492 +52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), + length 27: PPPoE [ses 0x1] MLPPP (0x003d), length 7: seq 0x000, + Flags [end], length 5 + +And the ICMPv4 echo request is successfully received at the server side: + +IP (tos 0x0, ttl 64, id 21925, offset 0, flags [DF], proto ICMP (1), + length 2976) + 192.168.222.2 > 192.168.15.254: ICMP echo request, id 30530, seq 0, + length 2956 + +The bug was introduced in commit c9aa6895371b2a257401f59d3393c9f7ac5a8698 +("[PPPOE]: Advertise PPPoE MTU") from the very beginning. This patch applies +to 3.10 upwards but the fix can be applied (with minor modifications) to +kernels as old as 2.6.32. + +Signed-off-by: Christoph Schulz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -675,7 +675,7 @@ static int pppoe_connect(struct socket * + po->chan.hdrlen = (sizeof(struct pppoe_hdr) + + dev->hard_header_len); + +- po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr); ++ po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; + po->chan.private = sk; + po->chan.ops = &pppoe_chan_ops; + diff --git a/queue-3.10/net-qmi_wwan-add-id-for-telewell-tw-lte-4g-v2.patch b/queue-3.10/net-qmi_wwan-add-id-for-telewell-tw-lte-4g-v2.patch new file mode 100644 index 00000000000..386c8523f2f --- /dev/null +++ b/queue-3.10/net-qmi_wwan-add-id-for-telewell-tw-lte-4g-v2.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Bernd Wachter +Date: Tue, 1 Jul 2014 22:01:09 +0300 +Subject: net: qmi_wwan: Add ID for Telewell TW-LTE 4G v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Bernd Wachter + +[ Upstream commit 8dcb4b1526747d8431f9895e153dd478c9d16186 ] + +There's a new version of the Telewell 4G modem working with, but not +recognized by this driver. + +Signed-off-by: Bernd Wachter +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -721,6 +721,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, + {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, + {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ ++ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ + {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ + {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ diff --git a/queue-3.10/net-qmi_wwan-add-two-sierra-wireless-netgear-devices.patch b/queue-3.10/net-qmi_wwan-add-two-sierra-wireless-netgear-devices.patch new file mode 100644 index 00000000000..55d723fac2b --- /dev/null +++ b/queue-3.10/net-qmi_wwan-add-two-sierra-wireless-netgear-devices.patch @@ -0,0 +1,40 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= +Date: Thu, 17 Jul 2014 13:33:51 +0200 +Subject: net: qmi_wwan: add two Sierra Wireless/Netgear devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= + +[ Upstream commit 5343330010a892b76a97fd93ad3c455a4a32a7fb ] + +Add two device IDs found in an out-of-tree driver downloadable +from Netgear. + +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -647,6 +647,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, + {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, + {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, ++ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, + {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ + {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ + {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ +@@ -734,6 +735,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ + {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ + {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */ ++ {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, + {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ + {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ + {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ diff --git a/queue-3.10/net-sctp-check-proc_dointvec-result-in-proc_sctp_do_auth.patch b/queue-3.10/net-sctp-check-proc_dointvec-result-in-proc_sctp_do_auth.patch new file mode 100644 index 00000000000..baf72b09851 --- /dev/null +++ b/queue-3.10/net-sctp-check-proc_dointvec-result-in-proc_sctp_do_auth.patch @@ -0,0 +1,44 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Daniel Borkmann +Date: Wed, 18 Jun 2014 23:46:31 +0200 +Subject: net: sctp: check proc_dointvec result in proc_sctp_do_auth + +From: Daniel Borkmann + +[ Upstream commit 24599e61b7552673dd85971cf5a35369cd8c119e ] + +When writing to the sysctl field net.sctp.auth_enable, it can well +be that the user buffer we handed over to proc_dointvec() via +proc_sctp_do_auth() handler contains something other than integers. + +In that case, we would set an uninitialized 4-byte value from the +stack to net->sctp.auth_enable that can be leaked back when reading +the sysctl variable, and it can unintentionally turn auth_enable +on/off based on the stack content since auth_enable is interpreted +as a boolean. + +Fix it up by making sure proc_dointvec() returned sucessfully. + +Fixes: b14878ccb7fa ("net: sctp: cache auth_enable per endpoint") +Reported-by: Florian Westphal +Signed-off-by: Daniel Borkmann +Acked-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sysctl.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/sctp/sysctl.c ++++ b/net/sctp/sysctl.c +@@ -368,8 +368,7 @@ static int proc_sctp_do_auth(struct ctl_ + tbl.data = &net->sctp.auth_enable; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); +- +- if (write) { ++ if (write && ret == 0) { + struct sock *sk = net->sctp.ctl_sock; + + net->sctp.auth_enable = new_value; diff --git a/queue-3.10/net-sctp-fix-information-leaks-in-ulpevent-layer.patch b/queue-3.10/net-sctp-fix-information-leaks-in-ulpevent-layer.patch new file mode 100644 index 00000000000..c8ad914d514 --- /dev/null +++ b/queue-3.10/net-sctp-fix-information-leaks-in-ulpevent-layer.patch @@ -0,0 +1,256 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Daniel Borkmann +Date: Sat, 12 Jul 2014 20:30:35 +0200 +Subject: net: sctp: fix information leaks in ulpevent layer + +From: Daniel Borkmann + +[ Upstream commit 8f2e5ae40ec193bc0a0ed99e95315c3eebca84ea ] + +While working on some other SCTP code, I noticed that some +structures shared with user space are leaking uninitialized +stack or heap buffer. In particular, struct sctp_sndrcvinfo +has a 2 bytes hole between .sinfo_flags and .sinfo_ppid that +remains unfilled by us in sctp_ulpevent_read_sndrcvinfo() when +putting this into cmsg. But also struct sctp_remote_error +contains a 2 bytes hole that we don't fill but place into a skb +through skb_copy_expand() via sctp_ulpevent_make_remote_error(). + +Both structures are defined by the IETF in RFC6458: + +* Section 5.3.2. SCTP Header Information Structure: + + The sctp_sndrcvinfo structure is defined below: + + struct sctp_sndrcvinfo { + uint16_t sinfo_stream; + uint16_t sinfo_ssn; + uint16_t sinfo_flags; + <-- 2 bytes hole --> + uint32_t sinfo_ppid; + uint32_t sinfo_context; + uint32_t sinfo_timetolive; + uint32_t sinfo_tsn; + uint32_t sinfo_cumtsn; + sctp_assoc_t sinfo_assoc_id; + }; + +* 6.1.3. SCTP_REMOTE_ERROR: + + A remote peer may send an Operation Error message to its peer. + This message indicates a variety of error conditions on an + association. The entire ERROR chunk as it appears on the wire + is included in an SCTP_REMOTE_ERROR event. Please refer to the + SCTP specification [RFC4960] and any extensions for a list of + possible error formats. An SCTP error notification has the + following format: + + struct sctp_remote_error { + uint16_t sre_type; + uint16_t sre_flags; + uint32_t sre_length; + uint16_t sre_error; + <-- 2 bytes hole --> + sctp_assoc_t sre_assoc_id; + uint8_t sre_data[]; + }; + +Fix this by setting both to 0 before filling them out. We also +have other structures shared between user and kernel space in +SCTP that contains holes (e.g. struct sctp_paddrthlds), but we +copy that buffer over from user space first and thus don't need +to care about it in that cases. + +While at it, we can also remove lengthy comments copied from +the draft, instead, we update the comment with the correct RFC +number where one can look it up. + +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ulpevent.c | 122 ++++++---------------------------------------------- + 1 file changed, 15 insertions(+), 107 deletions(-) + +--- a/net/sctp/ulpevent.c ++++ b/net/sctp/ulpevent.c +@@ -373,9 +373,10 @@ fail: + * specification [SCTP] and any extensions for a list of possible + * error formats. + */ +-struct sctp_ulpevent *sctp_ulpevent_make_remote_error( +- const struct sctp_association *asoc, struct sctp_chunk *chunk, +- __u16 flags, gfp_t gfp) ++struct sctp_ulpevent * ++sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ++ struct sctp_chunk *chunk, __u16 flags, ++ gfp_t gfp) + { + struct sctp_ulpevent *event; + struct sctp_remote_error *sre; +@@ -394,8 +395,7 @@ struct sctp_ulpevent *sctp_ulpevent_make + /* Copy the skb to a new skb with room for us to prepend + * notification with. + */ +- skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), +- 0, gfp); ++ skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp); + + /* Pull off the rest of the cause TLV from the chunk. */ + skb_pull(chunk->skb, elen); +@@ -406,62 +406,21 @@ struct sctp_ulpevent *sctp_ulpevent_make + event = sctp_skb2event(skb); + sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); + +- sre = (struct sctp_remote_error *) +- skb_push(skb, sizeof(struct sctp_remote_error)); ++ sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); + + /* Trim the buffer to the right length. */ +- skb_trim(skb, sizeof(struct sctp_remote_error) + elen); ++ skb_trim(skb, sizeof(*sre) + elen); + +- /* Socket Extensions for SCTP +- * 5.3.1.3 SCTP_REMOTE_ERROR +- * +- * sre_type: +- * It should be SCTP_REMOTE_ERROR. +- */ ++ /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */ ++ memset(sre, 0, sizeof(*sre)); + sre->sre_type = SCTP_REMOTE_ERROR; +- +- /* +- * Socket Extensions for SCTP +- * 5.3.1.3 SCTP_REMOTE_ERROR +- * +- * sre_flags: 16 bits (unsigned integer) +- * Currently unused. +- */ + sre->sre_flags = 0; +- +- /* Socket Extensions for SCTP +- * 5.3.1.3 SCTP_REMOTE_ERROR +- * +- * sre_length: sizeof (__u32) +- * +- * This field is the total length of the notification data, +- * including the notification header. +- */ + sre->sre_length = skb->len; +- +- /* Socket Extensions for SCTP +- * 5.3.1.3 SCTP_REMOTE_ERROR +- * +- * sre_error: 16 bits (unsigned integer) +- * This value represents one of the Operational Error causes defined in +- * the SCTP specification, in network byte order. +- */ + sre->sre_error = cause; +- +- /* Socket Extensions for SCTP +- * 5.3.1.3 SCTP_REMOTE_ERROR +- * +- * sre_assoc_id: sizeof (sctp_assoc_t) +- * +- * The association id field, holds the identifier for the association. +- * All notifications for a given association have the same association +- * identifier. For TCP style socket, this field is ignored. +- */ + sctp_ulpevent_set_owner(event, asoc); + sre->sre_assoc_id = sctp_assoc2id(asoc); + + return event; +- + fail: + return NULL; + } +@@ -906,7 +865,9 @@ __u16 sctp_ulpevent_get_notification_typ + return notification->sn_header.sn_type; + } + +-/* Copy out the sndrcvinfo into a msghdr. */ ++/* RFC6458, Section 5.3.2. SCTP Header Information Structure ++ * (SCTP_SNDRCV, DEPRECATED) ++ */ + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr) + { +@@ -915,74 +876,21 @@ void sctp_ulpevent_read_sndrcvinfo(const + if (sctp_ulpevent_is_notification(event)) + return; + +- /* Sockets API Extensions for SCTP +- * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) +- * +- * sinfo_stream: 16 bits (unsigned integer) +- * +- * For recvmsg() the SCTP stack places the message's stream number in +- * this value. +- */ ++ memset(&sinfo, 0, sizeof(sinfo)); + sinfo.sinfo_stream = event->stream; +- /* sinfo_ssn: 16 bits (unsigned integer) +- * +- * For recvmsg() this value contains the stream sequence number that +- * the remote endpoint placed in the DATA chunk. For fragmented +- * messages this is the same number for all deliveries of the message +- * (if more than one recvmsg() is needed to read the message). +- */ + sinfo.sinfo_ssn = event->ssn; +- /* sinfo_ppid: 32 bits (unsigned integer) +- * +- * In recvmsg() this value is +- * the same information that was passed by the upper layer in the peer +- * application. Please note that byte order issues are NOT accounted +- * for and this information is passed opaquely by the SCTP stack from +- * one end to the other. +- */ + sinfo.sinfo_ppid = event->ppid; +- /* sinfo_flags: 16 bits (unsigned integer) +- * +- * This field may contain any of the following flags and is composed of +- * a bitwise OR of these values. +- * +- * recvmsg() flags: +- * +- * SCTP_UNORDERED - This flag is present when the message was sent +- * non-ordered. +- */ + sinfo.sinfo_flags = event->flags; +- /* sinfo_tsn: 32 bit (unsigned integer) +- * +- * For the receiving side, this field holds a TSN that was +- * assigned to one of the SCTP Data Chunks. +- */ + sinfo.sinfo_tsn = event->tsn; +- /* sinfo_cumtsn: 32 bit (unsigned integer) +- * +- * This field will hold the current cumulative TSN as +- * known by the underlying SCTP layer. Note this field is +- * ignored when sending and only valid for a receive +- * operation when sinfo_flags are set to SCTP_UNORDERED. +- */ + sinfo.sinfo_cumtsn = event->cumtsn; +- /* sinfo_assoc_id: sizeof (sctp_assoc_t) +- * +- * The association handle field, sinfo_assoc_id, holds the identifier +- * for the association announced in the COMMUNICATION_UP notification. +- * All notifications for a given association have the same identifier. +- * Ignored for one-to-one style sockets. +- */ + sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); +- +- /* context value that is set via SCTP_CONTEXT socket option. */ ++ /* Context value that is set via SCTP_CONTEXT socket option. */ + sinfo.sinfo_context = event->asoc->default_rcv_context; +- + /* These fields are not used while receiving. */ + sinfo.sinfo_timetolive = 0; + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, +- sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); ++ sizeof(sinfo), &sinfo); + } + + /* Do accounting for bytes received and hold a reference to the association diff --git a/queue-3.10/netlink-fix-handling-of-error-from-netlink_dump.patch b/queue-3.10/netlink-fix-handling-of-error-from-netlink_dump.patch new file mode 100644 index 00000000000..565e4d89ce5 --- /dev/null +++ b/queue-3.10/netlink-fix-handling-of-error-from-netlink_dump.patch @@ -0,0 +1,52 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Ben Pfaff +Date: Wed, 9 Jul 2014 10:31:22 -0700 +Subject: netlink: Fix handling of error from netlink_dump(). + +From: Ben Pfaff + +[ Upstream commit ac30ef832e6af0505b6f0251a6659adcfa74975e ] + +netlink_dump() returns a negative errno value on error. Until now, +netlink_recvmsg() directly recorded that negative value in sk->sk_err, but +that's wrong since sk_err takes positive errno values. (This manifests as +userspace receiving a positive return value from the recv() system call, +falsely indicating success.) This bug was introduced in the commit that +started checking the netlink_dump() return value, commit b44d211 (netlink: +handle errors from netlink_dump()). + +Multithreaded Netlink dumps are one way to trigger this behavior in +practice, as described in the commit message for the userspace workaround +posted here: + http://openvswitch.org/pipermail/dev/2014-June/042339.html + +This commit also fixes the same bug in netlink_poll(), introduced in commit +cd1df525d (netlink: add flow control for memory mapped I/O). + +Signed-off-by: Ben Pfaff +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -500,7 +500,7 @@ static unsigned int netlink_poll(struct + while (nlk->cb != NULL && netlink_dump_space(nlk)) { + err = netlink_dump(sk); + if (err < 0) { +- sk->sk_err = err; ++ sk->sk_err = -err; + sk->sk_error_report(sk); + break; + } +@@ -2272,7 +2272,7 @@ static int netlink_recvmsg(struct kiocb + if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { + ret = netlink_dump(sk); + if (ret) { +- sk->sk_err = ret; ++ sk->sk_err = -ret; + sk->sk_error_report(sk); + } + } diff --git a/queue-3.10/series b/queue-3.10/series index e48af28e5f8..aa64254b8e0 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -13,3 +13,29 @@ iwlwifi-dvm-don-t-enable-cts-to-self.patch shmem-fix-faulting-into-a-hole-while-it-s-punched.patch shmem-fix-faulting-into-a-hole-not-taking-i_mutex.patch shmem-fix-splicing-from-a-hole-while-it-s-punched.patch +ip_tunnel-fix-ip_tunnel_lookup.patch +tcp-fix-tcp_match_skb_to_sack-for-unaligned-sack-at-end-of-an-skb.patch +net-sctp-check-proc_dointvec-result-in-proc_sctp_do_auth.patch +8021q-fix-a-potential-memory-leak.patch +ipv4-fix-dst-race-in-sk_dst_get.patch +ipv4-irq-safe-sk_dst_set-and-ipv4_sk_update_pmtu-fix.patch +net-fix-sparse-warning-in-sk_dst_set.patch +bnx2x-fix-possible-panic-under-memory-stress.patch +tcp-fix-divide-by-zero-when-pushing-during-tcp-repair.patch +ipv4-icmp-fix-pmtu-handling-for-rare-case.patch +net-qmi_wwan-add-id-for-telewell-tw-lte-4g-v2.patch +net-qmi_wwan-add-two-sierra-wireless-netgear-devices.patch +igmp-fix-the-problem-when-mc-leave-group.patch +tcp-fix-false-undo-corner-cases.patch +appletalk-fix-socket-referencing-in-skb.patch +net-mvneta-fix-operation-in-10-mbit-s-mode.patch +net-mvneta-fix-big-endian-issue-in-mvneta_txq_desc_csum.patch +netlink-fix-handling-of-error-from-netlink_dump.patch +be2net-set-eq-db-clear-intr-bit-in-be_open.patch +tipc-clear-next-pointer-of-message-fragments-before-reassembly.patch +net-sctp-fix-information-leaks-in-ulpevent-layer.patch +net-pppoe-use-correct-channel-mtu-when-using-multilink-ppp.patch +sunvnet-clean-up-objects-created-in-vnet_new-on-vnet_exit.patch +dns_resolver-assure-that-dns_query-result-is-null-terminated.patch +dns_resolver-null-terminate-the-right-string.patch +ipv4-fix-buffer-overflow-in-ip_options_compile.patch diff --git a/queue-3.10/sunvnet-clean-up-objects-created-in-vnet_new-on-vnet_exit.patch b/queue-3.10/sunvnet-clean-up-objects-created-in-vnet_new-on-vnet_exit.patch new file mode 100644 index 00000000000..de35c004ca7 --- /dev/null +++ b/queue-3.10/sunvnet-clean-up-objects-created-in-vnet_new-on-vnet_exit.patch @@ -0,0 +1,69 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Sowmini Varadhan +Date: Wed, 16 Jul 2014 10:02:26 -0400 +Subject: sunvnet: clean up objects created in vnet_new() on vnet_exit() + +From: Sowmini Varadhan + +[ Upstream commit a4b70a07ed12a71131cab7adce2ce91c71b37060 ] + +Nothing cleans up the objects created by +vnet_new(), they are completely leaked. + +vnet_exit(), after doing the vio_unregister_driver() to clean +up ports, should call a helper function that iterates over vnet_list +and cleans up those objects. This includes unregister_netdevice() +as well as free_netdev(). + +Signed-off-by: Sowmini Varadhan +Acked-by: Dave Kleikamp +Reviewed-by: Karl Volz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sun/sunvnet.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/sun/sunvnet.c ++++ b/drivers/net/ethernet/sun/sunvnet.c +@@ -1083,6 +1083,24 @@ static struct vnet *vnet_find_or_create( + return vp; + } + ++static void vnet_cleanup(void) ++{ ++ struct vnet *vp; ++ struct net_device *dev; ++ ++ mutex_lock(&vnet_list_mutex); ++ while (!list_empty(&vnet_list)) { ++ vp = list_first_entry(&vnet_list, struct vnet, list); ++ list_del(&vp->list); ++ dev = vp->dev; ++ /* vio_unregister_driver() should have cleaned up port_list */ ++ BUG_ON(!list_empty(&vp->port_list)); ++ unregister_netdev(dev); ++ free_netdev(dev); ++ } ++ mutex_unlock(&vnet_list_mutex); ++} ++ + static const char *local_mac_prop = "local-mac-address"; + + static struct vnet *vnet_find_parent(struct mdesc_handle *hp, +@@ -1240,7 +1258,6 @@ static int vnet_port_remove(struct vio_d + + kfree(port); + +- unregister_netdev(vp->dev); + } + return 0; + } +@@ -1268,6 +1285,7 @@ static int __init vnet_init(void) + static void __exit vnet_exit(void) + { + vio_unregister_driver(&vnet_port_driver); ++ vnet_cleanup(); + } + + module_init(vnet_init); diff --git a/queue-3.10/tcp-fix-divide-by-zero-when-pushing-during-tcp-repair.patch b/queue-3.10/tcp-fix-divide-by-zero-when-pushing-during-tcp-repair.patch new file mode 100644 index 00000000000..b0042042104 --- /dev/null +++ b/queue-3.10/tcp-fix-divide-by-zero-when-pushing-during-tcp-repair.patch @@ -0,0 +1,130 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Christoph Paasch +Date: Sat, 28 Jun 2014 18:26:37 +0200 +Subject: tcp: Fix divide by zero when pushing during tcp-repair + +From: Christoph Paasch + +[ Upstream commit 5924f17a8a30c2ae18d034a86ee7581b34accef6 ] + +When in repair-mode and TCP_RECV_QUEUE is set, we end up calling +tcp_push with mss_now being 0. If data is in the send-queue and +tcp_set_skb_tso_segs gets called, we crash because it will divide by +mss_now: + +[ 347.151939] divide error: 0000 [#1] SMP +[ 347.152907] Modules linked in: +[ 347.152907] CPU: 1 PID: 1123 Comm: packetdrill Not tainted 3.16.0-rc2 #4 +[ 347.152907] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +[ 347.152907] task: f5b88540 ti: f3c82000 task.ti: f3c82000 +[ 347.152907] EIP: 0060:[] EFLAGS: 00210246 CPU: 1 +[ 347.152907] EIP is at tcp_set_skb_tso_segs+0x49/0xa0 +[ 347.152907] EAX: 00000b67 EBX: f5acd080 ECX: 00000000 EDX: 00000000 +[ 347.152907] ESI: f5a28f40 EDI: f3c88f00 EBP: f3c83d10 ESP: f3c83d00 +[ 347.152907] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 +[ 347.152907] CR0: 80050033 CR2: 083158b0 CR3: 35146000 CR4: 000006b0 +[ 347.152907] Stack: +[ 347.152907] c167f9d9 f5acd080 000005b4 00000002 f3c83d20 c16013e6 f3c88f00 f5acd080 +[ 347.152907] f3c83da0 c1603b5a f3c83d38 c10a0188 00000000 00000000 f3c83d84 c10acc85 +[ 347.152907] c1ad5ec0 00000000 00000000 c1ad679c 010003e0 00000000 00000000 f3c88fc8 +[ 347.152907] Call Trace: +[ 347.152907] [] ? apic_timer_interrupt+0x2d/0x34 +[ 347.152907] [] tcp_init_tso_segs+0x36/0x50 +[ 347.152907] [] tcp_write_xmit+0x7a/0xbf0 +[ 347.152907] [] ? up+0x28/0x40 +[ 347.152907] [] ? console_unlock+0x295/0x480 +[ 347.152907] [] ? vprintk_emit+0x1ef/0x4b0 +[ 347.152907] [] __tcp_push_pending_frames+0x36/0xd0 +[ 347.152907] [] tcp_push+0xf0/0x120 +[ 347.152907] [] tcp_sendmsg+0xf1/0xbf0 +[ 347.152907] [] ? kmem_cache_free+0xf0/0x120 +[ 347.152907] [] ? __sigqueue_free+0x32/0x40 +[ 347.152907] [] ? __sigqueue_free+0x32/0x40 +[ 347.152907] [] ? do_wp_page+0x3e0/0x850 +[ 347.152907] [] inet_sendmsg+0x4a/0xb0 +[ 347.152907] [] ? handle_mm_fault+0x709/0xfb0 +[ 347.152907] [] sock_aio_write+0xbb/0xd0 +[ 347.152907] [] do_sync_write+0x69/0xa0 +[ 347.152907] [] vfs_write+0x123/0x160 +[ 347.152907] [] SyS_write+0x55/0xb0 +[ 347.152907] [] sysenter_do_call+0x12/0x28 + +This can easily be reproduced with the following packetdrill-script (the +"magic" with netem, sk_pacing and limit_output_bytes is done to prevent +the kernel from pushing all segments, because hitting the limit without +doing this is not so easy with packetdrill): + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 ++0.1 < . 1:1(0) ack 1 win 65000 + ++0 accept(3, ..., ...) = 4 + +// This forces that not all segments of the snd-queue will be pushed ++0 `tc qdisc add dev tun0 root netem delay 10ms` ++0 `sysctl -w net.ipv4.tcp_limit_output_bytes=2` ++0 setsockopt(4, SOL_SOCKET, 47, [2], 4) = 0 + ++0 write(4,...,10000) = 10000 ++0 write(4,...,10000) = 10000 + +// Set tcp-repair stuff, particularly TCP_RECV_QUEUE ++0 setsockopt(4, SOL_TCP, 19, [1], 4) = 0 ++0 setsockopt(4, SOL_TCP, 20, [1], 4) = 0 + +// This now will make the write push the remaining segments ++0 setsockopt(4, SOL_SOCKET, 47, [20000], 4) = 0 ++0 `sysctl -w net.ipv4.tcp_limit_output_bytes=130000` + +// Now we will crash ++0 write(4,...,1000) = 1000 + +This happens since ec3423257508 (tcp: fix retransmission in repair +mode). Prior to that, the call to tcp_push was prevented by a check for +tp->repair. + +The patch fixes it, by adding the new goto-label out_nopush. When exiting +tcp_sendmsg and a push is not required, which is the case for tp->repair, +we go to this label. + +When repairing and calling send() with TCP_RECV_QUEUE, the data is +actually put in the receive-queue. So, no push is required because no +data has been added to the send-queue. + +Cc: Andrew Vagin +Cc: Pavel Emelyanov +Fixes: ec3423257508 (tcp: fix retransmission in repair mode) +Signed-off-by: Christoph Paasch +Acked-by: Andrew Vagin +Acked-by: Pavel Emelyanov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1065,7 +1065,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru + if (unlikely(tp->repair)) { + if (tp->repair_queue == TCP_RECV_QUEUE) { + copied = tcp_send_rcvq(sk, msg, size); +- goto out; ++ goto out_nopush; + } + + err = -EINVAL; +@@ -1238,6 +1238,7 @@ wait_for_memory: + out: + if (copied) + tcp_push(sk, flags, mss_now, tp->nonagle); ++out_nopush: + release_sock(sk); + return copied + copied_syn; + diff --git a/queue-3.10/tcp-fix-false-undo-corner-cases.patch b/queue-3.10/tcp-fix-false-undo-corner-cases.patch new file mode 100644 index 00000000000..8306d7f06fa --- /dev/null +++ b/queue-3.10/tcp-fix-false-undo-corner-cases.patch @@ -0,0 +1,96 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Yuchung Cheng +Date: Wed, 2 Jul 2014 12:07:16 -0700 +Subject: tcp: fix false undo corner cases + +From: Yuchung Cheng + +[ Upstream commit 6e08d5e3c8236e7484229e46fdf92006e1dd4c49 ] + +The undo code assumes that, upon entering loss recovery, TCP +1) always retransmit something +2) the retransmission never fails locally (e.g., qdisc drop) + +so undo_marker is set in tcp_enter_recovery() and undo_retrans is +incremented only when tcp_retransmit_skb() is successful. + +When the assumption is broken because TCP's cwnd is too small to +retransmit or the retransmit fails locally. The next (DUP)ACK +would incorrectly revert the cwnd and the congestion state in +tcp_try_undo_dsack() or tcp_may_undo(). Subsequent (DUP)ACKs +may enter the recovery state. The sender repeatedly enter and +(incorrectly) exit recovery states if the retransmits continue to +fail locally while receiving (DUP)ACKs. + +The fix is to initialize undo_retrans to -1 and start counting on +the first retransmission. Always increment undo_retrans even if the +retransmissions fail locally because they couldn't cause DSACKs to +undo the cwnd reduction. + +Signed-off-by: Yuchung Cheng +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 8 ++++---- + net/ipv4/tcp_output.c | 6 ++++-- + 2 files changed, 8 insertions(+), 6 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1075,7 +1075,7 @@ static bool tcp_check_dsack(struct sock + } + + /* D-SACK for already forgotten data... Do dumb counting. */ +- if (dup_sack && tp->undo_marker && tp->undo_retrans && ++ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && + !after(end_seq_0, prior_snd_una) && + after(end_seq_0, tp->undo_marker)) + tp->undo_retrans--; +@@ -1154,7 +1154,7 @@ static u8 tcp_sacktag_one(struct sock *s + + /* Account D-SACK for retransmitted packet. */ + if (dup_sack && (sacked & TCPCB_RETRANS)) { +- if (tp->undo_marker && tp->undo_retrans && ++ if (tp->undo_marker && tp->undo_retrans > 0 && + after(end_seq, tp->undo_marker)) + tp->undo_retrans--; + if (sacked & TCPCB_SACKED_ACKED) +@@ -1850,7 +1850,7 @@ static void tcp_clear_retrans_partial(st + tp->lost_out = 0; + + tp->undo_marker = 0; +- tp->undo_retrans = 0; ++ tp->undo_retrans = -1; + } + + void tcp_clear_retrans(struct tcp_sock *tp) +@@ -2700,7 +2700,7 @@ static void tcp_enter_recovery(struct so + + tp->prior_ssthresh = 0; + tp->undo_marker = tp->snd_una; +- tp->undo_retrans = tp->retrans_out; ++ tp->undo_retrans = tp->retrans_out ? : -1; + + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + if (!ece_ack) +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2428,13 +2428,15 @@ int tcp_retransmit_skb(struct sock *sk, + if (!tp->retrans_stamp) + tp->retrans_stamp = TCP_SKB_CB(skb)->when; + +- tp->undo_retrans += tcp_skb_pcount(skb); +- + /* snd_nxt is stored to detect loss of retransmitted segment, + * see tcp_input.c tcp_sacktag_write_queue(). + */ + TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; + } ++ ++ if (tp->undo_retrans < 0) ++ tp->undo_retrans = 0; ++ tp->undo_retrans += tcp_skb_pcount(skb); + return err; + } + diff --git a/queue-3.10/tcp-fix-tcp_match_skb_to_sack-for-unaligned-sack-at-end-of-an-skb.patch b/queue-3.10/tcp-fix-tcp_match_skb_to_sack-for-unaligned-sack-at-end-of-an-skb.patch new file mode 100644 index 00000000000..8d48fde2e46 --- /dev/null +++ b/queue-3.10/tcp-fix-tcp_match_skb_to_sack-for-unaligned-sack-at-end-of-an-skb.patch @@ -0,0 +1,65 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Neal Cardwell +Date: Wed, 18 Jun 2014 21:15:03 -0400 +Subject: tcp: fix tcp_match_skb_to_sack() for unaligned SACK at end of an skb + +From: Neal Cardwell + +[ Upstream commit 2cd0d743b05e87445c54ca124a9916f22f16742e ] + +If there is an MSS change (or misbehaving receiver) that causes a SACK +to arrive that covers the end of an skb but is less than one MSS, then +tcp_match_skb_to_sack() was rounding up pkt_len to the full length of +the skb ("Round if necessary..."), then chopping all bytes off the skb +and creating a zero-byte skb in the write queue. + +This was visible now because the recently simplified TLP logic in +bef1909ee3ed1c ("tcp: fixing TLP's FIN recovery") could find that 0-byte +skb at the end of the write queue, and now that we do not check that +skb's length we could send it as a TLP probe. + +Consider the following example scenario: + + mss: 1000 + skb: seq: 0 end_seq: 4000 len: 4000 + SACK: start_seq: 3999 end_seq: 4000 + +The tcp_match_skb_to_sack() code will compute: + + in_sack = false + pkt_len = start_seq - TCP_SKB_CB(skb)->seq = 3999 - 0 = 3999 + new_len = (pkt_len / mss) * mss = (3999/1000)*1000 = 3000 + new_len += mss = 4000 + +Previously we would find the new_len > skb->len check failing, so we +would fall through and set pkt_len = new_len = 4000 and chop off +pkt_len of 4000 from the 4000-byte skb, leaving a 0-byte segment +afterward in the write queue. + +With this new commit, we notice that the new new_len >= skb->len check +succeeds, so that we return without trying to fragment. + +Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries") +Reported-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Cc: Eric Dumazet +Cc: Yuchung Cheng +Cc: Ilpo Jarvinen +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1130,7 +1130,7 @@ static int tcp_match_skb_to_sack(struct + unsigned int new_len = (pkt_len / mss) * mss; + if (!in_sack && new_len < pkt_len) { + new_len += mss; +- if (new_len > skb->len) ++ if (new_len >= skb->len) + return 0; + } + pkt_len = new_len; diff --git a/queue-3.10/tipc-clear-next-pointer-of-message-fragments-before-reassembly.patch b/queue-3.10/tipc-clear-next-pointer-of-message-fragments-before-reassembly.patch new file mode 100644 index 00000000000..c6dcc8b3f37 --- /dev/null +++ b/queue-3.10/tipc-clear-next-pointer-of-message-fragments-before-reassembly.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Jul 26 10:03:51 PDT 2014 +From: Jon Paul Maloy +Date: Fri, 11 Jul 2014 08:45:27 -0400 +Subject: tipc: clear 'next'-pointer of message fragments before reassembly + +From: Jon Paul Maloy + +[ Upstream commit 999417549c16dd0e3a382aa9f6ae61688db03181 ] + +If the 'next' pointer of the last fragment buffer in a message is not +zeroed before reassembly, we risk ending up with a corrupt message, +since the reassembly function itself isn't doing this. + +Currently, when a buffer is retrieved from the deferred queue of the +broadcast link, the next pointer is not cleared, with the result as +described above. + +This commit corrects this, and thereby fixes a bug that may occur when +long broadcast messages are transmitted across dual interfaces. The bug +has been present since 40ba3cdf542a469aaa9083fa041656e59b109b90 ("tipc: +message reassembly using fragment chain") + +This commit should be applied to both net and net-next. + +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/bcast.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/tipc/bcast.c ++++ b/net/tipc/bcast.c +@@ -531,6 +531,7 @@ receive: + + buf = node->bclink.deferred_head; + node->bclink.deferred_head = buf->next; ++ buf->next = NULL; + node->bclink.deferred_size--; + goto receive; + }