From 0485f45e30cb855f2d3fdc78c8be669d07ccccaf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Feb 2014 09:18:39 -0800 Subject: [PATCH] 3.13-stable patches added patches: fib_frontend-fix-possible-null-pointer-dereference.patch ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch net-gre-use-icmp_hdr-to-get-inner-ip-header.patch net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch xen-netfront-fix-resource-leak-in-netfront.patch --- ...ix-possible-null-pointer-dereference.patch | 35 +++ ...t-in-case-dst_link_failure-is-called.patch | 36 ++++ ...-if-tproxy-used-with-tcp-early-demux.patch | 96 +++++++++ ...-use-icmp_hdr-to-get-inner-ip-header.patch | 40 ++++ ...marking-and-checksum-checks-with-ovs.patch | 59 +++++ queue-3.13/series | 6 + ...tfront-fix-resource-leak-in-netfront.patch | 204 ++++++++++++++++++ 7 files changed, 476 insertions(+) create mode 100644 queue-3.13/fib_frontend-fix-possible-null-pointer-dereference.patch create mode 100644 queue-3.13/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch create mode 100644 queue-3.13/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch create mode 100644 queue-3.13/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch create mode 100644 queue-3.13/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch create mode 100644 queue-3.13/xen-netfront-fix-resource-leak-in-netfront.patch diff --git a/queue-3.13/fib_frontend-fix-possible-null-pointer-dereference.patch b/queue-3.13/fib_frontend-fix-possible-null-pointer-dereference.patch new file mode 100644 index 00000000000..c389db8bde6 --- /dev/null +++ b/queue-3.13/fib_frontend-fix-possible-null-pointer-dereference.patch @@ -0,0 +1,35 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Oliver Hartkopp +Date: Thu, 23 Jan 2014 10:19:34 +0100 +Subject: fib_frontend: fix possible NULL pointer dereference + +From: Oliver Hartkopp + +[ Upstream commit a0065f266a9b5d51575535a25c15ccbeed9a9966 ] + +The two commits 0115e8e30d (net: remove delay at device dismantle) and +748e2d9396a (net: reinstate rtnl in call_netdevice_notifiers()) silently +removed a NULL pointer check for in_dev since Linux 3.7. + +This patch re-introduces this check as it causes crashing the kernel when +setting small mtu values on non-ip capable netdevices. + +Signed-off-by: Oliver Hartkopp +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1047,6 +1047,8 @@ static int fib_netdev_event(struct notif + } + + in_dev = __in_dev_get_rtnl(dev); ++ if (!in_dev) ++ return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: diff --git a/queue-3.13/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch b/queue-3.13/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch new file mode 100644 index 00000000000..6c1ff9f5ee3 --- /dev/null +++ b/queue-3.13/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Duan Jiong +Date: Thu, 23 Jan 2014 14:00:25 +0800 +Subject: ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called + +From: Duan Jiong + +[ Upstream commit 11c21a307d79ea5f6b6fc0d3dfdeda271e5e65f6 ] + +commit a622260254ee48("ip_tunnel: fix kernel panic with icmp_dest_unreach") +clear IPCB in ip_tunnel_xmit() , or else skb->cb[] may contain garbage from +GSO segmentation layer. + +But commit 0e6fbc5b6c621("ip_tunnels: extend iptunnel_xmit()") refactor codes, +and it clear IPCB behind the dst_link_failure(). + +So clear IPCB in ip_tunnel_xmit() just like commti a622260254ee48("ip_tunnel: +fix kernel panic with icmp_dest_unreach"). + +Signed-off-by: Duan Jiong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -618,6 +618,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + ++ memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + dst_link_failure(skb); + } else + tunnel->err_count = 0; diff --git a/queue-3.13/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch b/queue-3.13/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch new file mode 100644 index 00000000000..663a604911a --- /dev/null +++ b/queue-3.13/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch @@ -0,0 +1,96 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Holger Eitzenberger +Date: Mon, 27 Jan 2014 10:33:18 +0100 +Subject: net: Fix memory leak if TPROXY used with TCP early demux + +From: Holger Eitzenberger + +[ Upstream commit a452ce345d63ddf92cd101e4196569f8718ad319 ] + +I see a memory leak when using a transparent HTTP proxy using TPROXY +together with TCP early demux and Kernel v3.8.13.15 (Ubuntu stable): + +unreferenced object 0xffff88008cba4a40 (size 1696): + comm "softirq", pid 0, jiffies 4294944115 (age 8907.520s) + hex dump (first 32 bytes): + 0a e0 20 6a 40 04 1b 37 92 be 32 e2 e8 b4 00 00 .. j@..7..2..... + 02 00 07 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmem_cache_alloc+0xad/0xb9 + [] sk_prot_alloc+0x29/0xc5 + [] sk_clone_lock+0x14/0x283 + [] inet_csk_clone_lock+0xf/0x7b + [] netlink_broadcast+0x14/0x16 + [] tcp_create_openreq_child+0x1b/0x4c3 + [] tcp_v4_syn_recv_sock+0x38/0x25d + [] tcp_check_req+0x25c/0x3d0 + [] tcp_v4_do_rcv+0x287/0x40e + [] ip_route_input_noref+0x843/0xa55 + [] tcp_v4_rcv+0x4c9/0x725 + [] ip_local_deliver_finish+0xe9/0x154 + [] __netif_receive_skb+0x4b2/0x514 + [] process_backlog+0xee/0x1c5 + [] net_rx_action+0xa7/0x200 + [] add_interrupt_randomness+0x39/0x157 + +But there are many more, resulting in the machine going OOM after some +days. + +From looking at the TPROXY code, and with help from Florian, I see +that the memory leak is introduced in tcp_v4_early_demux(): + + void tcp_v4_early_demux(struct sk_buff *skb) + { + /* ... */ + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + skb->skb_iif); + if (sk) { + skb->sk = sk; + +where the socket is assigned unconditionally to skb->sk, also bumping +the refcnt on it. This is problematic, because in our case the skb +has already a socket assigned in the TPROXY target. This then results +in the leak I see. + +The very same issue seems to be with IPv6, but haven't tested. + +Reviewed-by: Florian Westphal +Signed-off-by: Holger Eitzenberger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_input.c | 2 +- + net/ipv6/ip6_input.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -314,7 +314,7 @@ static int ip_rcv_finish(struct sk_buff + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; + +- if (sysctl_ip_early_demux && !skb_dst(skb)) { ++ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + +--- a/net/ipv6/ip6_input.c ++++ b/net/ipv6/ip6_input.c +@@ -49,7 +49,7 @@ + + int ip6_rcv_finish(struct sk_buff *skb) + { +- if (sysctl_ip_early_demux && !skb_dst(skb)) { ++ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + const struct inet6_protocol *ipprot; + + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); diff --git a/queue-3.13/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch b/queue-3.13/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch new file mode 100644 index 00000000000..f357eeaef5a --- /dev/null +++ b/queue-3.13/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Duan Jiong +Date: Tue, 28 Jan 2014 11:49:43 +0800 +Subject: net: gre: use icmp_hdr() to get inner ip header + +From: Duan Jiong + +[ Upstream commit c0c0c50ff7c3e331c90bab316d21f724fb9e1994 ] + +When dealing with icmp messages, the skb->data points the +ip header that triggered the sending of the icmp message. + +In gre_cisco_err(), the parse_gre_header() is called, and the +iptunnel_pull_header() is called to pull the skb at the end of +the parse_gre_header(), so the skb->data doesn't point the +inner ip header. + +Unfortunately, the ipgre_err still needs those ip addresses in +inner ip header to look up tunnel by ip_tunnel_lookup(). + +So just use icmp_hdr() to get inner ip header instead of skb->data. + +Signed-off-by: Duan Jiong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -178,7 +178,7 @@ static int ipgre_err(struct sk_buff *skb + else + itn = net_generic(net, ipgre_net_id); + +- iph = (const struct iphdr *)skb->data; ++ iph = (const struct iphdr *)(icmp_hdr(skb) + 1); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); + diff --git a/queue-3.13/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch b/queue-3.13/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch new file mode 100644 index 00000000000..ee23b7c0688 --- /dev/null +++ b/queue-3.13/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Or Gerlitz +Date: Thu, 23 Jan 2014 11:28:13 +0200 +Subject: net/vxlan: Share RX skb de-marking and checksum checks with ovs + +From: Or Gerlitz + +[ Upstream commit d0bc65557ad09a57b4db176e9e3ccddb26971453 ] + +Make sure the practice set by commit 0afb166 "vxlan: Add capability +of Rx checksum offload for inner packet" is applied when the skb +goes through the portion of the RX code which is shared between +vxlan netdevices and ovs vxlan port instances. + +Cc: Joseph Gasparakis +Cc: Pravin B Shelar +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1047,6 +1047,15 @@ static int vxlan_udp_encap_recv(struct s + if (!vs) + goto drop; + ++ /* If the NIC driver gave us an encapsulated packet ++ * with the encapsulation mark, the device checksummed it ++ * for us. Otherwise force the upper layers to verify it. ++ */ ++ if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation) ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ skb->encapsulation = 0; ++ + vs->rcv(vs, skb, vxh->vx_vni); + return 0; + +@@ -1105,17 +1114,6 @@ static void vxlan_rcv(struct vxlan_sock + + skb_reset_network_header(skb); + +- /* If the NIC driver gave us an encapsulated packet with +- * CHECKSUM_UNNECESSARY and Rx checksum feature is enabled, +- * leave the CHECKSUM_UNNECESSARY, the device checksummed it +- * for us. Otherwise force the upper layers to verify it. +- */ +- if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation || +- !(vxlan->dev->features & NETIF_F_RXCSUM)) +- skb->ip_summed = CHECKSUM_NONE; +- +- skb->encapsulation = 0; +- + if (oip6) + err = IP6_ECN_decapsulate(oip6, skb); + if (oip) diff --git a/queue-3.13/series b/queue-3.13/series index 30dce047bab..370af29a77f 100644 --- a/queue-3.13/series +++ b/queue-3.13/series @@ -110,3 +110,9 @@ i2c-mv64xxx-fix-bus-hang-on-a0-version-of-the-armada-xp-socs.patch i2c-mv64xxx-document-the-newly-introduced-armada-xp-a0-compatible.patch i2c-piix4-add-support-for-amd-ml-and-cz-smbus-changes.patch drivers-tty-ehv_bytechan-fails-to-build-as-a-module.patch +ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch +net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch +fib_frontend-fix-possible-null-pointer-dereference.patch +net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch +xen-netfront-fix-resource-leak-in-netfront.patch +net-gre-use-icmp_hdr-to-get-inner-ip-header.patch diff --git a/queue-3.13/xen-netfront-fix-resource-leak-in-netfront.patch b/queue-3.13/xen-netfront-fix-resource-leak-in-netfront.patch new file mode 100644 index 00000000000..293ca6c7e15 --- /dev/null +++ b/queue-3.13/xen-netfront-fix-resource-leak-in-netfront.patch @@ -0,0 +1,204 @@ +From foo@baz Tue Feb 4 09:16:27 PST 2014 +From: Annie Li +Date: Tue, 28 Jan 2014 11:35:42 +0800 +Subject: xen-netfront: fix resource leak in netfront + +From: Annie Li + +[ Upstream commit cefe0078eea52af17411eb1248946a94afb84ca5 ] + +This patch removes grant transfer releasing code from netfront, and uses +gnttab_end_foreign_access to end grant access since +gnttab_end_foreign_access_ref may fail when the grant entry is +currently used for reading or writing. + +* clean up grant transfer code kept from old netfront(2.6.18) which grants +pages for access/map and transfer. But grant transfer is deprecated in current +netfront, so remove corresponding release code for transfer. + +* fix resource leak, release grant access (through gnttab_end_foreign_access) +and skb for tx/rx path, use get_page to ensure page is released when grant +access is completed successfully. + +Xen-blkfront/xen-tpmfront/xen-pcifront also have similar issue, but patches +for them will be created separately. + +V6: Correct subject line and commit message. + +V5: Remove unecessary change in xennet_end_access. + +V4: Revert put_page in gnttab_end_foreign_access, and keep netfront change in +single patch. + +V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill +grant acess is ended. + +V2: Improve patch comments. + +Signed-off-by: Annie Li +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 88 +++++++++++++-------------------------------- + 1 file changed, 26 insertions(+), 62 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -117,6 +117,7 @@ struct netfront_info { + } tx_skbs[NET_TX_RING_SIZE]; + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; ++ struct page *grant_tx_page[NET_TX_RING_SIZE]; + unsigned tx_skb_freelist; + + spinlock_t rx_lock ____cacheline_aligned_in_smp; +@@ -396,6 +397,7 @@ static void xennet_tx_buf_gc(struct net_ + gnttab_release_grant_reference( + &np->gref_tx_head, np->grant_tx_ref[id]); + np->grant_tx_ref[id] = GRANT_INVALID_REF; ++ np->grant_tx_page[id] = NULL; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); + dev_kfree_skb_irq(skb); + } +@@ -452,6 +454,7 @@ static void xennet_make_frags(struct sk_ + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + ++ np->grant_tx_page[id] = virt_to_page(data); + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; +@@ -497,6 +500,7 @@ static void xennet_make_frags(struct sk_ + np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + ++ np->grant_tx_page[id] = page; + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = bytes; +@@ -596,6 +600,7 @@ static int xennet_start_xmit(struct sk_b + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); ++ np->grant_tx_page[id] = virt_to_page(data); + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; +@@ -1122,10 +1127,11 @@ static void xennet_release_tx_bufs(struc + continue; + + skb = np->tx_skbs[i].skb; +- gnttab_end_foreign_access_ref(np->grant_tx_ref[i], +- GNTMAP_readonly); +- gnttab_release_grant_reference(&np->gref_tx_head, +- np->grant_tx_ref[i]); ++ get_page(np->grant_tx_page[i]); ++ gnttab_end_foreign_access(np->grant_tx_ref[i], ++ GNTMAP_readonly, ++ (unsigned long)page_address(np->grant_tx_page[i])); ++ np->grant_tx_page[i] = NULL; + np->grant_tx_ref[i] = GRANT_INVALID_REF; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); + dev_kfree_skb_irq(skb); +@@ -1134,78 +1140,35 @@ static void xennet_release_tx_bufs(struc + + static void xennet_release_rx_bufs(struct netfront_info *np) + { +- struct mmu_update *mmu = np->rx_mmu; +- struct multicall_entry *mcl = np->rx_mcl; +- struct sk_buff_head free_list; +- struct sk_buff *skb; +- unsigned long mfn; +- int xfer = 0, noxfer = 0, unused = 0; + int id, ref; + +- dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", +- __func__); +- return; +- +- skb_queue_head_init(&free_list); +- + spin_lock_bh(&np->rx_lock); + + for (id = 0; id < NET_RX_RING_SIZE; id++) { +- ref = np->grant_rx_ref[id]; +- if (ref == GRANT_INVALID_REF) { +- unused++; +- continue; +- } ++ struct sk_buff *skb; ++ struct page *page; + + skb = np->rx_skbs[id]; +- mfn = gnttab_end_foreign_transfer_ref(ref); +- gnttab_release_grant_reference(&np->gref_rx_head, ref); +- np->grant_rx_ref[id] = GRANT_INVALID_REF; +- +- if (0 == mfn) { +- skb_shinfo(skb)->nr_frags = 0; +- dev_kfree_skb(skb); +- noxfer++; ++ if (!skb) + continue; +- } + +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- /* Remap the page. */ +- const struct page *page = +- skb_frag_page(&skb_shinfo(skb)->frags[0]); +- unsigned long pfn = page_to_pfn(page); +- void *vaddr = page_address(page); ++ ref = np->grant_rx_ref[id]; ++ if (ref == GRANT_INVALID_REF) ++ continue; + +- MULTI_update_va_mapping(mcl, (unsigned long)vaddr, +- mfn_pte(mfn, PAGE_KERNEL), +- 0); +- mcl++; +- mmu->ptr = ((u64)mfn << PAGE_SHIFT) +- | MMU_MACHPHYS_UPDATE; +- mmu->val = pfn; +- mmu++; ++ page = skb_frag_page(&skb_shinfo(skb)->frags[0]); + +- set_phys_to_machine(pfn, mfn); +- } +- __skb_queue_tail(&free_list, skb); +- xfer++; +- } +- +- dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", +- __func__, xfer, noxfer, unused); ++ /* gnttab_end_foreign_access() needs a page ref until ++ * foreign access is ended (which may be deferred). ++ */ ++ get_page(page); ++ gnttab_end_foreign_access(ref, 0, ++ (unsigned long)page_address(page)); ++ np->grant_rx_ref[id] = GRANT_INVALID_REF; + +- if (xfer) { +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- /* Do all the remapping work and M2P updates. */ +- MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, +- NULL, DOMID_SELF); +- mcl++; +- HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); +- } ++ kfree_skb(skb); + } + +- __skb_queue_purge(&free_list); +- + spin_unlock_bh(&np->rx_lock); + } + +@@ -1358,6 +1321,7 @@ static struct net_device *xennet_create_ + for (i = 0; i < NET_RX_RING_SIZE; i++) { + np->rx_skbs[i] = NULL; + np->grant_rx_ref[i] = GRANT_INVALID_REF; ++ np->grant_tx_page[i] = NULL; + } + + /* A grant for every tx ring slot */ -- 2.47.2