From 6c7aacf6eb3956b8b3e54d4b5c00e5cab06e033a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 24 Jun 2013 14:08:33 -0700 Subject: [PATCH] 3.9-stable patches added patches: 8139cp-reset-bql-when-ring-tx-ring-cleared.patch gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch ipv4-fix-redirect-handling-for-tcp-packets.patch ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch ipv6-fix-possible-crashes-in-ip6_cork_release.patch l2tp-fix-ppp-header-erasure-and-memory-leak.patch l2tp-fix-sendmsg-return-value.patch macvtap-set-transport-header-before-passing-skb-to-lower-device.patch net-802-mrp-fix-lockdep-splat.patch netback-set-transport-header-before-passing-it-to-kernel.patch net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch netlabel-improve-domain-mapping-validation.patch net-phy-fix-a-bug-when-verify-the-eee-support.patch net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch net_sched-restore-overhead-xxx-handling.patch net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch packet-set-transport-header-before-doing-xmit.patch r8169-fix-offloaded-tx-checksum-for-small-packets.patch sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch tcp-bug-fix-in-proportional-rate-reduction.patch tcp-fix-tcp_md5_hash_skb_data.patch tcp-xps-fix-reordering-issues.patch team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch team-move-add-to-port-list-before-port-enablement.patch tuntap-correct-the-return-value-in-tun_set_iff.patch tuntap-forbid-changing-mq-flag-for-persistent-device.patch tuntap-set-sock_zerocopy-flag-during-open.patch tuntap-set-transport-header-before-passing-it-to-kernel.patch udp6-fix-udp-fragmentation-for-tunnel-traffic.patch vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch vxlan-update-vxlan-fdb-used-field-after-each-usage.patch xfrm-properly-handle-invalid-states-as-an-error.patch --- ...-reset-bql-when-ring-tx-ring-cleared.patch | 30 ++ ...ounmap-on-error-in-gianfar_ptp_probe.patch | 29 ++ ...-kernel-panic-with-icmp_dest_unreach.patch | 72 +++++ ...ix-redirect-handling-for-tcp-packets.patch | 47 ++++ ...nfo-to-inet6_ifaddr-in-init_loopback.patch | 40 +++ ...possible-crashes-in-ip6_cork_release.patch | 36 +++ ...x-ppp-header-erasure-and-memory-leak.patch | 37 +++ queue-3.9/l2tp-fix-sendmsg-return-value.patch | 30 ++ ...r-before-passing-skb-to-lower-device.patch | 57 ++++ queue-3.9/net-802-mrp-fix-lockdep-splat.patch | 50 ++++ ..._compat-in-send-m-msg-and-recv-m-msg.patch | 261 ++++++++++++++++++ ...ck-string-in-af_family_-_key_strings.patch | 49 ++++ ...ps-when-plug-unplug-cable-many-times.patch | 174 ++++++++++++ ...em-in-hlist_nulls_for_each_entry_rcu.patch | 53 ++++ ...ix-a-bug-when-verify-the-eee-support.patch | 31 +++ ...er-dereference-in-socket-destruction.patch | 95 +++++++ ...incorrect-rx-length-error-if-r8a7740.patch | 50 ++++ ...-packet-length-for-untrusted-packets.patch | 46 +++ ...b-do-not-mix-1ns-and-64ns-time-units.patch | 131 +++++++++ ..._sched-restore-overhead-xxx-handling.patch | 174 ++++++++++++ ...t-header-before-passing-it-to-kernel.patch | 70 +++++ ...el-improve-domain-mapping-validation.patch | 116 ++++++++ ...e-sure-string-is-always-0-terminated.patch | 41 +++ ...t-transport-header-before-doing-xmit.patch | 96 +++++++ ...loaded-tx-checksum-for-small-packets.patch | 108 ++++++++ ...itialize-sctp_outq-in-sctp_outq_init.patch | 58 ++++ queue-3.9/series | 39 +++ ...g-fix-in-proportional-rate-reduction.patch | 127 +++++++++ queue-3.9/tcp-fix-tcp_md5_hash_skb_data.patch | 47 ++++ queue-3.9/tcp-xps-fix-reordering-issues.patch | 47 ++++ ...-team_get_port_by_index_rcu-for-null.patch | 34 +++ ...-to-port-list-before-port-enablement.patch | 38 +++ ...rect-the-return-value-in-tun_set_iff.patch | 45 +++ ...anging-mq-flag-for-persistent-device.patch | 43 +++ ...p-set-sock_zerocopy-flag-during-open.patch | 34 +++ ...t-header-before-passing-it-to-kernel.patch | 59 ++++ ...udp-fragmentation-for-tunnel-traffic.patch | 99 +++++++ ...trol-for-non-zerocopy-case-during-tx.patch | 59 ++++ ...xlan-fdb-used-field-after-each-usage.patch | 69 +++++ ...ly-handle-invalid-states-as-an-error.patch | 36 +++ 40 files changed, 2757 insertions(+) create mode 100644 queue-3.9/8139cp-reset-bql-when-ring-tx-ring-cleared.patch create mode 100644 queue-3.9/gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch create mode 100644 queue-3.9/ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch create mode 100644 queue-3.9/ipv4-fix-redirect-handling-for-tcp-packets.patch create mode 100644 queue-3.9/ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch create mode 100644 queue-3.9/ipv6-fix-possible-crashes-in-ip6_cork_release.patch create mode 100644 queue-3.9/l2tp-fix-ppp-header-erasure-and-memory-leak.patch create mode 100644 queue-3.9/l2tp-fix-sendmsg-return-value.patch create mode 100644 queue-3.9/macvtap-set-transport-header-before-passing-skb-to-lower-device.patch create mode 100644 queue-3.9/net-802-mrp-fix-lockdep-splat.patch create mode 100644 queue-3.9/net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch create mode 100644 queue-3.9/net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch create mode 100644 queue-3.9/net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch create mode 100644 queue-3.9/net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch create mode 100644 queue-3.9/net-phy-fix-a-bug-when-verify-the-eee-support.patch create mode 100644 queue-3.9/net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch create mode 100644 queue-3.9/net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch create mode 100644 queue-3.9/net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch create mode 100644 queue-3.9/net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch create mode 100644 queue-3.9/net_sched-restore-overhead-xxx-handling.patch create mode 100644 queue-3.9/netback-set-transport-header-before-passing-it-to-kernel.patch create mode 100644 queue-3.9/netlabel-improve-domain-mapping-validation.patch create mode 100644 queue-3.9/packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch create mode 100644 queue-3.9/packet-set-transport-header-before-doing-xmit.patch create mode 100644 queue-3.9/r8169-fix-offloaded-tx-checksum-for-small-packets.patch create mode 100644 queue-3.9/sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch create mode 100644 queue-3.9/tcp-bug-fix-in-proportional-rate-reduction.patch create mode 100644 queue-3.9/tcp-fix-tcp_md5_hash_skb_data.patch create mode 100644 queue-3.9/tcp-xps-fix-reordering-issues.patch create mode 100644 queue-3.9/team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch create mode 100644 queue-3.9/team-move-add-to-port-list-before-port-enablement.patch create mode 100644 queue-3.9/tuntap-correct-the-return-value-in-tun_set_iff.patch create mode 100644 queue-3.9/tuntap-forbid-changing-mq-flag-for-persistent-device.patch create mode 100644 queue-3.9/tuntap-set-sock_zerocopy-flag-during-open.patch create mode 100644 queue-3.9/tuntap-set-transport-header-before-passing-it-to-kernel.patch create mode 100644 queue-3.9/udp6-fix-udp-fragmentation-for-tunnel-traffic.patch create mode 100644 queue-3.9/vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch create mode 100644 queue-3.9/vxlan-update-vxlan-fdb-used-field-after-each-usage.patch create mode 100644 queue-3.9/xfrm-properly-handle-invalid-states-as-an-error.patch diff --git a/queue-3.9/8139cp-reset-bql-when-ring-tx-ring-cleared.patch b/queue-3.9/8139cp-reset-bql-when-ring-tx-ring-cleared.patch new file mode 100644 index 00000000000..7aa89b27fc7 --- /dev/null +++ b/queue-3.9/8139cp-reset-bql-when-ring-tx-ring-cleared.patch @@ -0,0 +1,30 @@ +From fc1467415805b7009f68aa2814201c30927f1c3f Mon Sep 17 00:00:00 2001 +From: stephen hemminger +Date: Mon, 20 May 2013 06:54:43 +0000 +Subject: 8139cp: reset BQL when ring tx ring cleared + +From: stephen hemminger + +[ Upstream commit 98962baad72fd6d393bf39dbb7c2076532c363c6 ] + +This patch cures transmit timeout's with DHCP observed +while running under KVM. When the transmit ring is cleaned out, +the Byte Queue Limit values need to be reset. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139cp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/realtek/8139cp.c ++++ b/drivers/net/ethernet/realtek/8139cp.c +@@ -1136,6 +1136,7 @@ static void cp_clean_rings (struct cp_pr + cp->dev->stats.tx_dropped++; + } + } ++ netdev_reset_queue(cp->dev); + + memset(cp->rx_ring, 0, sizeof(struct cp_desc) * CP_RX_RING_SIZE); + memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE); diff --git a/queue-3.9/gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch b/queue-3.9/gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch new file mode 100644 index 00000000000..2c6c878b9d9 --- /dev/null +++ b/queue-3.9/gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch @@ -0,0 +1,29 @@ +From e22e02802afcb8f6863aabc9a3f127653b13360f Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Thu, 16 May 2013 22:25:34 +0000 +Subject: gianfar: add missing iounmap() on error in gianfar_ptp_probe() + +From: Wei Yongjun + +[ Upstream commit e5f5e380e0f3bb11f04ca5bc66a551e58e0ad26e ] + +Add the missing iounmap() before return from gianfar_ptp_probe() +in the error handling case. + +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/gianfar_ptp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/freescale/gianfar_ptp.c ++++ b/drivers/net/ethernet/freescale/gianfar_ptp.c +@@ -521,6 +521,7 @@ static int gianfar_ptp_probe(struct plat + return 0; + + no_clock: ++ iounmap(etsects->regs); + no_ioremap: + release_resource(etsects->rsrc); + no_resource: diff --git a/queue-3.9/ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch b/queue-3.9/ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch new file mode 100644 index 00000000000..bf11a26de37 --- /dev/null +++ b/queue-3.9/ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch @@ -0,0 +1,72 @@ +From c45feaee5a78ec4e95b52081a3ea1f840a8ce3e8 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 24 May 2013 05:49:58 +0000 +Subject: ip_tunnel: fix kernel panic with icmp_dest_unreach + +From: Eric Dumazet + +[ Upstream commit a622260254ee481747cceaaa8609985b29a31565 ] + +Daniel Petre reported crashes in icmp_dst_unreach() with following call +graph: + +Daniel found a similar problem mentioned in + http://lkml.indiana.edu/hypermail/linux/kernel/1007.0/00961.html + +And indeed this is the root cause : skb->cb[] contains data fooling IP +stack. + +We must clear IPCB in ip_tunnel_xmit() sooner in case dst_link_failure() +is called. Or else skb->cb[] might contain garbage from GSO segmentation +layer. + +A similar fix was tested on linux-3.9, but gre code was refactored in +linux-3.10. I'll send patches for stable kernels as well. + +Many thanks to Daniel for providing reports, patches and testing ! + +Reported-by: Daniel Petre +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 2 +- + net/ipv4/ipip.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -804,6 +804,7 @@ static netdev_tx_t ipgre_tunnel_xmit(str + tiph = &tunnel->parms.iph; + } + ++ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if ((dst = tiph->daddr) == 0) { + /* NBMA tunnel */ + +@@ -952,7 +953,6 @@ static netdev_tx_t ipgre_tunnel_xmit(str + skb_push(skb, gre_hlen); + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*iph)); +- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); + skb_dst_drop(skb); +--- a/net/ipv4/ipip.c ++++ b/net/ipv4/ipip.c +@@ -491,6 +491,7 @@ static netdev_tx_t ipip_tunnel_xmit(stru + if (tos & 1) + tos = old_iph->tos; + ++ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if (!dst) { + /* NBMA tunnel */ + if ((rt = skb_rtable(skb)) == NULL) { +@@ -573,7 +574,6 @@ static netdev_tx_t ipip_tunnel_xmit(stru + skb->transport_header = skb->network_header; + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); +- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); + skb_dst_drop(skb); diff --git a/queue-3.9/ipv4-fix-redirect-handling-for-tcp-packets.patch b/queue-3.9/ipv4-fix-redirect-handling-for-tcp-packets.patch new file mode 100644 index 00000000000..90274bfad64 --- /dev/null +++ b/queue-3.9/ipv4-fix-redirect-handling-for-tcp-packets.patch @@ -0,0 +1,47 @@ +From 463521cb1ad9682bae946fffe7ee1cb92b08200f Mon Sep 17 00:00:00 2001 +From: Michal Kubecek +Date: Tue, 28 May 2013 08:26:49 +0200 +Subject: ipv4: fix redirect handling for TCP packets + +From: Michal Kubecek + +[ Upstream commit f96ef988cc603487c03a6de07807b06cbe641829 ] + +Unlike ipv4_redirect() and ipv4_sk_redirect(), ip_do_redirect() +doesn't call __build_flow_key() directly but via +ip_rt_build_flow_key() wrapper. This leads to __build_flow_key() +getting pointer to IPv4 header of the ICMP redirect packet +rather than pointer to the embedded IPv4 header of the packet +initiating the redirect. + +As a result, handling of ICMP redirects initiated by TCP packets +is broken. Issue was introduced by + + 4895c771c ("ipv4: Add FIB nexthop exceptions.") + +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -737,10 +737,15 @@ static void ip_do_redirect(struct dst_en + { + struct rtable *rt; + struct flowi4 fl4; ++ const struct iphdr *iph = (const struct iphdr *) skb->data; ++ int oif = skb->dev->ifindex; ++ u8 tos = RT_TOS(iph->tos); ++ u8 prot = iph->protocol; ++ u32 mark = skb->mark; + + rt = (struct rtable *) dst; + +- ip_rt_build_flow_key(&fl4, sk, skb); ++ __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __ip_do_redirect(rt, skb, &fl4, true); + } + diff --git a/queue-3.9/ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch b/queue-3.9/ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch new file mode 100644 index 00000000000..bf348a8c609 --- /dev/null +++ b/queue-3.9/ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch @@ -0,0 +1,40 @@ +From 28017e35af095f8af93ef3759d048c8835ffbb3e Mon Sep 17 00:00:00 2001 +From: Gao feng +Date: Sun, 2 Jun 2013 22:16:21 +0000 +Subject: ipv6: assign rt6_info to inet6_ifaddr in init_loopback + +From: Gao feng + +[ Upstream commit 534c877928a16ae5f9776436a497109639bf67dc ] + +Commit 25fb6ca4ed9cad72f14f61629b68dc03c0d9713f +"net IPv6 : Fix broken IPv6 routing table after loopback down-up" +forgot to assign rt6_info to the inet6_ifaddr. +When disable the net device, the rt6_info which allocated +in init_loopback will not be destroied in __ipv6_ifa_notify. + +This will trigger the waring message below +[23527.916091] unregister_netdevice: waiting for tap0 to become free. Usage count = 1 + +Reported-by: Arkadiusz Miskiewicz +Signed-off-by: Gao feng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -2560,8 +2560,10 @@ static void init_loopback(struct net_dev + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ +- if (!IS_ERR(sp_rt)) ++ if (!IS_ERR(sp_rt)) { ++ sp_ifa->rt = sp_rt; + ip6_ins_rt(sp_rt); ++ } + } + read_unlock_bh(&idev->lock); + } diff --git a/queue-3.9/ipv6-fix-possible-crashes-in-ip6_cork_release.patch b/queue-3.9/ipv6-fix-possible-crashes-in-ip6_cork_release.patch new file mode 100644 index 00000000000..4dcdb024cec --- /dev/null +++ b/queue-3.9/ipv6-fix-possible-crashes-in-ip6_cork_release.patch @@ -0,0 +1,36 @@ +From 48bcc66c3b948490dcc9e41bfe26ddeece5d5919 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 17 May 2013 04:53:13 +0000 +Subject: ipv6: fix possible crashes in ip6_cork_release() + +From: Eric Dumazet + +[ Upstream commit 284041ef21fdf2e0d216ab6b787bc9072b4eb58a ] + +commit 0178b695fd6b4 ("ipv6: Copy cork options in ip6_append_data") +added some code duplication and bad error recovery, leading to potential +crash in ip6_cork_release() as kfree() could be called with garbage. + +use kzalloc() to make sure this wont happen. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Cc: Herbert Xu +Cc: Hideaki YOSHIFUJI +Cc: Neal Cardwell +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1147,7 +1147,7 @@ int ip6_append_data(struct sock *sk, int + if (WARN_ON(np->cork.opt)) + return -EINVAL; + +- np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); ++ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(np->cork.opt == NULL)) + return -ENOBUFS; + diff --git a/queue-3.9/l2tp-fix-ppp-header-erasure-and-memory-leak.patch b/queue-3.9/l2tp-fix-ppp-header-erasure-and-memory-leak.patch new file mode 100644 index 00000000000..bb7647ac362 --- /dev/null +++ b/queue-3.9/l2tp-fix-ppp-header-erasure-and-memory-leak.patch @@ -0,0 +1,37 @@ +From f4d2a6a3b62f075a8b2d119cc65ba6edcb0f95ae Mon Sep 17 00:00:00 2001 +From: Guillaume Nault +Date: Wed, 12 Jun 2013 16:07:23 +0200 +Subject: l2tp: Fix PPP header erasure and memory leak + +From: Guillaume Nault + +[ Upstream commit 55b92b7a11690bc377b5d373872a6b650ae88e64 ] + +Copy user data after PPP framing header. This prevents erasure of the +added PPP header and avoids leaking two bytes of uninitialised memory +at the end of skb's data buffer. + +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -346,12 +346,12 @@ static int pppol2tp_sendmsg(struct kiocb + skb_put(skb, 2); + + /* Copy user data into skb */ +- error = memcpy_fromiovec(skb->data, m->msg_iov, total_len); ++ error = memcpy_fromiovec(skb_put(skb, total_len), m->msg_iov, ++ total_len); + if (error < 0) { + kfree_skb(skb); + goto error_put_sess_tun; + } +- skb_put(skb, total_len); + + l2tp_xmit_skb(session, skb, session->hdr_len); + diff --git a/queue-3.9/l2tp-fix-sendmsg-return-value.patch b/queue-3.9/l2tp-fix-sendmsg-return-value.patch new file mode 100644 index 00000000000..ad0f2e776f2 --- /dev/null +++ b/queue-3.9/l2tp-fix-sendmsg-return-value.patch @@ -0,0 +1,30 @@ +From 993e543152dc2c2d4aa461403afd0b49f087e148 Mon Sep 17 00:00:00 2001 +From: Guillaume Nault +Date: Wed, 12 Jun 2013 16:07:36 +0200 +Subject: l2tp: Fix sendmsg() return value + +From: Guillaume Nault + +[ Upstream commit a6f79d0f26704214b5b702bbac525cb72997f984 ] + +PPPoL2TP sockets should comply with the standard send*() return values +(i.e. return number of bytes sent instead of 0 upon success). + +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -358,7 +358,7 @@ static int pppol2tp_sendmsg(struct kiocb + sock_put(ps->tunnel_sock); + sock_put(sk); + +- return error; ++ return total_len; + + error_put_sess_tun: + sock_put(ps->tunnel_sock); diff --git a/queue-3.9/macvtap-set-transport-header-before-passing-skb-to-lower-device.patch b/queue-3.9/macvtap-set-transport-header-before-passing-skb-to-lower-device.patch new file mode 100644 index 00000000000..732afd0ef7e --- /dev/null +++ b/queue-3.9/macvtap-set-transport-header-before-passing-skb-to-lower-device.patch @@ -0,0 +1,57 @@ +From 6fce35d87d22ec8e666e91f7dc2a15ee0d86deeb Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 25 Mar 2013 20:19:55 +0000 +Subject: macvtap: set transport header before passing skb to lower device + +From: Jason Wang + +[ Upstream commit 9b4d669bc06c215d64f56f1eb0d4eb96e14d689d ] + +Set the transport header for 1) some drivers (e.g ixgbe) needs l4 header 2) +precise packet length estimation (introduced in 1def9238) needs l4 header to +compute header length. + +For the packets with partial checksum, the patch just set the transport header +to csum_start. Otherwise tries to use skb_flow_dissect() to get l4 offset, if it +fails, just pretend no l4 header. + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + /* + * A macvtap queue is the central object of this driver, it connects +@@ -645,6 +646,7 @@ static ssize_t macvtap_get_user(struct m + int vnet_hdr_len = 0; + int copylen = 0; + bool zerocopy = false; ++ struct flow_keys keys; + + if (q->flags & IFF_VNET_HDR) { + vnet_hdr_len = q->vnet_hdr_sz; +@@ -725,6 +727,13 @@ static ssize_t macvtap_get_user(struct m + goto err_kfree; + } + ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ skb_set_transport_header(skb, skb_checksum_start_offset(skb)); ++ else if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_set_transport_header(skb, ETH_HLEN); ++ + rcu_read_lock_bh(); + vlan = rcu_dereference_bh(q->vlan); + /* copy skb_ubuf_info for callback when skb has no error */ diff --git a/queue-3.9/net-802-mrp-fix-lockdep-splat.patch b/queue-3.9/net-802-mrp-fix-lockdep-splat.patch new file mode 100644 index 00000000000..116ad823705 --- /dev/null +++ b/queue-3.9/net-802-mrp-fix-lockdep-splat.patch @@ -0,0 +1,50 @@ +From fb00986d4aeeacc15e60ff5bea4761549281de98 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 13 May 2013 02:24:11 +0000 +Subject: net/802/mrp: fix lockdep splat + +From: Eric Dumazet + +[ Upstream commit faff57a92ba1d7247c5e86ecea2886d2c9d54507 ] + +commit fb745e9a037895 ("net/802/mrp: fix possible race condition when +calling mrp_pdu_queue()") introduced a lockdep splat. + +[ 19.735147] ================================= +[ 19.735235] [ INFO: inconsistent lock state ] +[ 19.735324] 3.9.2-build-0063 #4 Not tainted +[ 19.735412] --------------------------------- +[ 19.735500] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. +[ 19.735592] rmmod/1840 [HC0[0]:SC0[0]:HE1:SE1] takes: +[ 19.735682] (&(&app->lock)->rlock#2){+.?...}, at: [] +mrp_uninit_applicant+0x69/0xba [mrp] + +app->lock is normally taken under softirq context, so disable BH to +avoid the splat. + +Reported-by: Denys Fedoryshchenko +Signed-off-by: Eric Dumazet +Cc: David Ward +Cc: Cong Wang +Tested-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/802/mrp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/802/mrp.c ++++ b/net/802/mrp.c +@@ -871,10 +871,10 @@ void mrp_uninit_applicant(struct net_dev + */ + del_timer_sync(&app->join_timer); + +- spin_lock(&app->lock); ++ spin_lock_bh(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); +- spin_unlock(&app->lock); ++ spin_unlock_bh(&app->lock); + + mrp_queue_xmit(app); + diff --git a/queue-3.9/net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch b/queue-3.9/net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch new file mode 100644 index 00000000000..e405e467a2a --- /dev/null +++ b/queue-3.9/net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch @@ -0,0 +1,261 @@ +From 6ef6200a9c65506a7649ed30b2bbefff11430a85 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Wed, 22 May 2013 14:07:44 -0700 +Subject: net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg + +From: Andy Lutomirski + +[ Upstream commits 1be374a0518a288147c6a7398792583200a67261 and + a7526eb5d06b0084ef12d7b168d008fcf516caab ] + +MSG_CMSG_COMPAT is (AFAIK) not intended to be part of the API -- +it's a hack that steals a bit to indicate to other networking code +that a compat entry was used. So don't allow it from a non-compat +syscall. + +This prevents an oops when running this code: + +int main() +{ + int s; + struct sockaddr_in addr; + struct msghdr *hdr; + + char *highpage = mmap((void*)(TASK_SIZE_MAX - 4096), 4096, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (highpage == MAP_FAILED) + err(1, "mmap"); + + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (s == -1) + err(1, "socket"); + + addr.sin_family = AF_INET; + addr.sin_port = htons(1); + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + if (connect(s, (struct sockaddr*)&addr, sizeof(addr)) != 0) + err(1, "connect"); + + void *evil = highpage + 4096 - COMPAT_MSGHDR_SIZE; + printf("Evil address is %p\n", evil); + + if (syscall(__NR_sendmmsg, s, evil, 1, MSG_CMSG_COMPAT) < 0) + err(1, "sendmmsg"); + + return 0; +} + +Signed-off-by: Andy Lutomirski +Cc: David S. Miller +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/socket.h | 3 ++ + net/compat.c | 13 ++++++++-- + net/socket.c | 61 +++++++++++++++++++++++++++++++++---------------- + 3 files changed, 56 insertions(+), 21 deletions(-) + +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -321,6 +321,9 @@ extern int put_cmsg(struct msghdr*, int + + struct timespec; + ++/* The __sys_...msg variants allow MSG_CMSG_COMPAT */ ++extern long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); ++extern long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); + extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout); + extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, +--- a/net/compat.c ++++ b/net/compat.c +@@ -734,19 +734,25 @@ static unsigned char nas[21] = { + + asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) + { +- return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + } + + asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags) + { ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; + return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT); + } + + asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) + { +- return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); + } + + asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags) +@@ -768,6 +774,9 @@ asmlinkage long compat_sys_recvmmsg(int + int datagrams; + struct timespec ktspec; + ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ + if (COMPAT_USE_64BIT_TIME) + return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT, +--- a/net/socket.c ++++ b/net/socket.c +@@ -1978,7 +1978,7 @@ struct used_address { + unsigned int name_len; + }; + +-static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, ++static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address) + { +@@ -2093,22 +2093,30 @@ out: + * BSD sendmsg interface + */ + +-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) ++long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) + { + int fput_needed, err; + struct msghdr msg_sys; +- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); ++ struct socket *sock; + ++ sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + goto out; + +- err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); ++ err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL); + + fput_light(sock->file, fput_needed); + out: + return err; + } + ++SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) ++{ ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ return __sys_sendmsg(fd, msg, flags); ++} ++ + /* + * Linux sendmmsg interface + */ +@@ -2139,15 +2147,16 @@ int __sys_sendmmsg(int fd, struct mmsghd + + while (datagrams < vlen) { + if (MSG_CMSG_COMPAT & flags) { +- err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, +- &msg_sys, flags, &used_address); ++ err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry, ++ &msg_sys, flags, &used_address); + if (err < 0) + break; + err = __put_user(err, &compat_entry->msg_len); + ++compat_entry; + } else { +- err = __sys_sendmsg(sock, (struct msghdr __user *)entry, +- &msg_sys, flags, &used_address); ++ err = ___sys_sendmsg(sock, ++ (struct msghdr __user *)entry, ++ &msg_sys, flags, &used_address); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); +@@ -2171,10 +2180,12 @@ int __sys_sendmmsg(int fd, struct mmsghd + SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags) + { ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; + return __sys_sendmmsg(fd, mmsg, vlen, flags); + } + +-static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, ++static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, int nosec) + { + struct compat_msghdr __user *msg_compat = +@@ -2266,23 +2277,31 @@ out: + * BSD recvmsg interface + */ + +-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, +- unsigned int, flags) ++long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags) + { + int fput_needed, err; + struct msghdr msg_sys; +- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); ++ struct socket *sock; + ++ sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + goto out; + +- err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); ++ err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); + + fput_light(sock->file, fput_needed); + out: + return err; + } + ++SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, ++ unsigned int, flags) ++{ ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ return __sys_recvmsg(fd, msg, flags); ++} ++ + /* + * Linux recvmmsg interface + */ +@@ -2320,17 +2339,18 @@ int __sys_recvmmsg(int fd, struct mmsghd + * No need to ask LSM for more than the first datagram. + */ + if (MSG_CMSG_COMPAT & flags) { +- err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, +- &msg_sys, flags & ~MSG_WAITFORONE, +- datagrams); ++ err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry, ++ &msg_sys, flags & ~MSG_WAITFORONE, ++ datagrams); + if (err < 0) + break; + err = __put_user(err, &compat_entry->msg_len); + ++compat_entry; + } else { +- err = __sys_recvmsg(sock, (struct msghdr __user *)entry, +- &msg_sys, flags & ~MSG_WAITFORONE, +- datagrams); ++ err = ___sys_recvmsg(sock, ++ (struct msghdr __user *)entry, ++ &msg_sys, flags & ~MSG_WAITFORONE, ++ datagrams); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); +@@ -2397,6 +2417,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struc + int datagrams; + struct timespec timeout_sys; + ++ if (flags & MSG_CMSG_COMPAT) ++ return -EINVAL; ++ + if (!timeout) + return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); + diff --git a/queue-3.9/net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch b/queue-3.9/net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch new file mode 100644 index 00000000000..0261b87cc2f --- /dev/null +++ b/queue-3.9/net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch @@ -0,0 +1,49 @@ +From 2f98f8fe710005d6bacd7a145590fcbd740f2ee5 Mon Sep 17 00:00:00 2001 +From: Federico Vaga +Date: Tue, 28 May 2013 05:02:44 +0000 +Subject: net/core/sock.c: add missing VSOCK string in af_family_*_key_strings + +From: Federico Vaga + +[ Upstream commit 456db6a4d495f40777da6f1f32f62f13026f52db ] + +The three arrays of strings: af_family_key_strings, +af_family_slock_key_strings and af_family_clock_key_strings have not +VSOCK's string + +Signed-off-by: Federico Vaga +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -210,7 +210,7 @@ static const char *const af_family_key_s + "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , +- "sk_lock-AF_NFC" , "sk_lock-AF_MAX" ++ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" + }; + static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , +@@ -226,7 +226,7 @@ static const char *const af_family_slock + "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , +- "slock-AF_NFC" , "slock-AF_MAX" ++ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" + }; + static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , +@@ -242,7 +242,7 @@ static const char *const af_family_clock + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , +- "clock-AF_NFC" , "clock-AF_MAX" ++ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" + }; + + /* diff --git a/queue-3.9/net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch b/queue-3.9/net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch new file mode 100644 index 00000000000..6641ea405f7 --- /dev/null +++ b/queue-3.9/net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch @@ -0,0 +1,174 @@ +From 0197f9ab0a6d40f1f7d58787e2dfdb2908f4eb27 Mon Sep 17 00:00:00 2001 +From: Frank Li +Date: Tue, 7 May 2013 14:08:44 +0000 +Subject: net: fec: fix kernel oops when plug/unplug cable many times + +From: Frank Li + +[ Upstream commits 54309fa60b5f57b90c1842176f6045e665d21142 and + 3169134478a9638baf0dbb4fdca5a0718cbe8e27 ] + +reproduce steps + 1. flood ping from other machine + ping -f -s 41000 IP + 2. run below script + while [ 1 ]; do ethtool -s eth0 autoneg off; + sleep 3;ethtool -s eth0 autoneg on; sleep 4; done; + +You can see oops in one hour. + +The reason is fec_restart clear BD but NAPI may use it. +The solution is disable NAPI and stop xmit when reset BD. +disable NAPI may sleep, so fec_restart can't be call in +atomic context. + +Signed-off-by: Frank Li +Reviewed-by: Lucas Stach +Tested-by: Lucas Stach +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/fec.c | 44 ++++++++++++++++++++++++++--------- + drivers/net/ethernet/freescale/fec.h | 10 ++++--- + 2 files changed, 39 insertions(+), 15 deletions(-) + +--- a/drivers/net/ethernet/freescale/fec.c ++++ b/drivers/net/ethernet/freescale/fec.c +@@ -407,6 +407,13 @@ fec_restart(struct net_device *ndev, int + u32 rcntl = OPT_FRAME_SIZE | 0x04; + u32 ecntl = 0x2; /* ETHEREN */ + ++ if (netif_running(ndev)) { ++ netif_device_detach(ndev); ++ napi_disable(&fep->napi); ++ netif_stop_queue(ndev); ++ netif_tx_lock_bh(ndev); ++ } ++ + /* Whack a reset. We should wait for this. */ + writel(1, fep->hwp + FEC_ECNTRL); + udelay(10); +@@ -559,6 +566,13 @@ fec_restart(struct net_device *ndev, int + + /* Enable interrupts we wish to service */ + writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); ++ ++ if (netif_running(ndev)) { ++ netif_device_attach(ndev); ++ napi_enable(&fep->napi); ++ netif_wake_queue(ndev); ++ netif_tx_unlock_bh(ndev); ++ } + } + + static void +@@ -598,8 +612,22 @@ fec_timeout(struct net_device *ndev) + + ndev->stats.tx_errors++; + +- fec_restart(ndev, fep->full_duplex); +- netif_wake_queue(ndev); ++ fep->delay_work.timeout = true; ++ schedule_delayed_work(&(fep->delay_work.delay_work), 0); ++} ++ ++static void fec_enet_work(struct work_struct *work) ++{ ++ struct fec_enet_private *fep = ++ container_of(work, ++ struct fec_enet_private, ++ delay_work.delay_work.work); ++ ++ if (fep->delay_work.timeout) { ++ fep->delay_work.timeout = false; ++ fec_restart(fep->netdev, fep->full_duplex); ++ netif_wake_queue(fep->netdev); ++ } + } + + static void +@@ -970,16 +998,12 @@ static void fec_enet_adjust_link(struct + { + struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phy_dev = fep->phy_dev; +- unsigned long flags; +- + int status_change = 0; + +- spin_lock_irqsave(&fep->hw_lock, flags); +- + /* Prevent a state halted on mii error */ + if (fep->mii_timeout && phy_dev->state == PHY_HALTED) { + phy_dev->state = PHY_RESUMING; +- goto spin_unlock; ++ return; + } + + if (phy_dev->link) { +@@ -1007,9 +1031,6 @@ static void fec_enet_adjust_link(struct + } + } + +-spin_unlock: +- spin_unlock_irqrestore(&fep->hw_lock, flags); +- + if (status_change) + phy_print_status(phy_dev); + } +@@ -1656,7 +1677,6 @@ static int fec_enet_init(struct net_devi + } + + memset(cbd_base, 0, PAGE_SIZE); +- spin_lock_init(&fep->hw_lock); + + fep->netdev = ndev; + +@@ -1882,6 +1902,7 @@ fec_probe(struct platform_device *pdev) + if (ret) + goto failed_register; + ++ INIT_DELAYED_WORK(&(fep->delay_work.delay_work), fec_enet_work); + return 0; + + failed_register: +@@ -1918,6 +1939,7 @@ fec_drv_remove(struct platform_device *p + struct resource *r; + int i; + ++ cancel_delayed_work_sync(&(fep->delay_work.delay_work)); + unregister_netdev(ndev); + fec_enet_mii_remove(fep); + del_timer_sync(&fep->time_keep); +--- a/drivers/net/ethernet/freescale/fec.h ++++ b/drivers/net/ethernet/freescale/fec.h +@@ -191,6 +191,11 @@ struct bufdesc_ex { + #define BD_ENET_RX_INT 0x00800000 + #define BD_ENET_RX_PTP ((ushort)0x0400) + ++struct fec_enet_delayed_work { ++ struct delayed_work delay_work; ++ bool timeout; ++}; ++ + /* The FEC buffer descriptors track the ring buffers. The rx_bd_base and + * tx_bd_base always point to the base of the buffer descriptors. The + * cur_rx and cur_tx point to the currently available buffer. +@@ -224,9 +229,6 @@ struct fec_enet_private { + /* The ring entries to be free()ed */ + struct bufdesc *dirty_tx; + +- /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */ +- spinlock_t hw_lock; +- + struct platform_device *pdev; + + int opened; +@@ -260,7 +262,7 @@ struct fec_enet_private { + int hwts_rx_en; + int hwts_tx_en; + struct timer_list time_keep; +- ++ struct fec_enet_delayed_work delay_work; + }; + + void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev); diff --git a/queue-3.9/net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch b/queue-3.9/net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch new file mode 100644 index 00000000000..2e4e605d5dd --- /dev/null +++ b/queue-3.9/net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch @@ -0,0 +1,53 @@ +From 82a3363bed90dce4ce17e982e7b2b5f06b018eaf Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 29 May 2013 09:06:27 +0000 +Subject: net: force a reload of first item in hlist_nulls_for_each_entry_rcu + +From: Eric Dumazet + +[ Upstream commit c87a124a5d5e8cf8e21c4363c3372bcaf53ea190 ] + +Roman Gushchin discovered that udp4_lib_lookup2() was not reloading +first item in the rcu protected list, in case the loop was restarted. + +This produced soft lockups as in https://lkml.org/lkml/2013/4/16/37 + +rcu_dereference(X)/ACCESS_ONCE(X) seem to not work as intended if X is +ptr->field : + +In some cases, gcc caches the value or ptr->field in a register. + +Use a barrier() to disallow such caching, as documented in +Documentation/atomic_ops.txt line 114 + +Thanks a lot to Roman for providing analysis and numerous patches. + +Diagnosed-by: Roman Gushchin +Signed-off-by: Eric Dumazet +Reported-by: Boris Zhmurov +Signed-off-by: Roman Gushchin +Acked-by: Paul E. McKenney +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rculist_nulls.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/include/linux/rculist_nulls.h ++++ b/include/linux/rculist_nulls.h +@@ -105,9 +105,14 @@ static inline void hlist_nulls_add_head_ + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + * ++ * The barrier() is needed to make sure compiler doesn't cache first element [1], ++ * as this loop can be restarted [2] ++ * [1] Documentation/atomic_ops.txt around line 114 ++ * [2] Documentation/RCU/rculist_nulls.txt around line 146 + */ + #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ +- for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ ++ for (({barrier();}), \ ++ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) diff --git a/queue-3.9/net-phy-fix-a-bug-when-verify-the-eee-support.patch b/queue-3.9/net-phy-fix-a-bug-when-verify-the-eee-support.patch new file mode 100644 index 00000000000..12a3f9ee8d2 --- /dev/null +++ b/queue-3.9/net-phy-fix-a-bug-when-verify-the-eee-support.patch @@ -0,0 +1,31 @@ +From 6d77367fe6b5e63439b52c7fe67befa4618d1f68 Mon Sep 17 00:00:00 2001 +From: Giuseppe CAVALLARO +Date: Sun, 26 May 2013 21:31:28 +0000 +Subject: net: phy: fix a bug when verify the EEE support + +From: Giuseppe CAVALLARO + +[ Upstream commit 9a9c56cb34e65000d1f0a4b7553399bfcf7c5a52 ] + +The phy_init_eee has to exit with an error when the +local device and its link partner both do not support EEE. +So this patch fixes a problem when verify this. + +Signed-off-by: Giuseppe Cavallaro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -1092,7 +1092,7 @@ int phy_init_eee(struct phy_device *phyd + adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv); + lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp); + idx = phy_find_setting(phydev->speed, phydev->duplex); +- if ((lp & adv & settings[idx].setting)) ++ if (!(lp & adv & settings[idx].setting)) + goto eee_exit; + + if (clk_stop_enable) { diff --git a/queue-3.9/net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch b/queue-3.9/net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch new file mode 100644 index 00000000000..ab0eff4da4b --- /dev/null +++ b/queue-3.9/net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch @@ -0,0 +1,95 @@ +From a8876bf4fec55f6ee91277c5f18fcccd276c18ed Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 6 Jun 2013 15:53:47 +0200 +Subject: net: sctp: fix NULL pointer dereference in socket destruction + +From: Daniel Borkmann + +[ Upstream commit 1abd165ed757db1afdefaac0a4bc8a70f97d258c ] + +While stress testing sctp sockets, I hit the following panic: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 +IP: [] sctp_endpoint_free+0xe/0x40 [sctp] +PGD 7cead067 PUD 7ce76067 PMD 0 +Oops: 0000 [#1] SMP +Modules linked in: sctp(F) libcrc32c(F) [...] +CPU: 7 PID: 2950 Comm: acc Tainted: GF 3.10.0-rc2+ #1 +Hardware name: Dell Inc. PowerEdge T410/0H19HD, BIOS 1.6.3 02/01/2011 +task: ffff88007ce0e0c0 ti: ffff88007b568000 task.ti: ffff88007b568000 +RIP: 0010:[] [] sctp_endpoint_free+0xe/0x40 [sctp] +RSP: 0018:ffff88007b569e08 EFLAGS: 00010292 +RAX: 0000000000000000 RBX: ffff88007db78a00 RCX: dead000000200200 +RDX: ffffffffa049fdb0 RSI: ffff8800379baf38 RDI: 0000000000000000 +RBP: ffff88007b569e18 R08: ffff88007c230da0 R09: 0000000000000001 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff880077990d00 R14: 0000000000000084 R15: ffff88007db78a00 +FS: 00007fc18ab61700(0000) GS:ffff88007fc60000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +CR2: 0000000000000020 CR3: 000000007cf9d000 CR4: 00000000000007e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +Stack: + ffff88007b569e38 ffff88007db78a00 ffff88007b569e38 ffffffffa049fded + ffffffff81abf0c0 ffff88007db78a00 ffff88007b569e58 ffffffff8145b60e + 0000000000000000 0000000000000000 ffff88007b569eb8 ffffffff814df36e +Call Trace: + [] sctp_destroy_sock+0x3d/0x80 [sctp] + [] sk_common_release+0x1e/0xf0 + [] inet_create+0x2ae/0x350 + [] __sock_create+0x11f/0x240 + [] sock_create+0x30/0x40 + [] SyS_socket+0x4c/0xc0 + [] ? do_page_fault+0xe/0x10 + [] ? page_fault+0x22/0x30 + [] system_call_fastpath+0x16/0x1b +Code: 0c c9 c3 66 2e 0f 1f 84 00 00 00 00 00 e8 fb fe ff ff c9 c3 66 0f + 1f 84 00 00 00 00 00 55 48 89 e5 53 48 83 ec 08 66 66 66 66 90 <48> + 8b 47 20 48 89 fb c6 47 1c 01 c6 40 12 07 e8 9e 68 01 00 48 +RIP [] sctp_endpoint_free+0xe/0x40 [sctp] + RSP +CR2: 0000000000000020 +---[ end trace e0d71ec1108c1dd9 ]--- + +I did not hit this with the lksctp-tools functional tests, but with a +small, multi-threaded test program, that heavily allocates, binds, +listens and waits in accept on sctp sockets, and then randomly kills +some of them (no need for an actual client in this case to hit this). +Then, again, allocating, binding, etc, and then killing child processes. + +This panic then only occurs when ``echo 1 > /proc/sys/net/sctp/auth_enable'' +is set. The cause for that is actually very simple: in sctp_endpoint_init() +we enter the path of sctp_auth_init_hmacs(). There, we try to allocate +our crypto transforms through crypto_alloc_hash(). In our scenario, +it then can happen that crypto_alloc_hash() fails with -EINTR from +crypto_larval_wait(), thus we bail out and release the socket via +sk_common_release(), sctp_destroy_sock() and hit the NULL pointer +dereference as soon as we try to access members in the endpoint during +sctp_endpoint_free(), since endpoint at that time is still NULL. Now, +if we have that case, we do not need to do any cleanup work and just +leave the destruction handler. + +Signed-off-by: Daniel Borkmann +Acked-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4002,6 +4002,12 @@ SCTP_STATIC void sctp_destroy_sock(struc + + /* Release our hold on the endpoint. */ + sp = sctp_sk(sk); ++ /* This could happen during socket init, thus we bail out ++ * early, since the rest of the below is not setup either. ++ */ ++ if (sp->ep == NULL) ++ return; ++ + if (sp->do_auto_asconf) { + sp->do_auto_asconf = 0; + list_del(&sp->auto_asconf_list); diff --git a/queue-3.9/net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch b/queue-3.9/net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch new file mode 100644 index 00000000000..0b52d35f70b --- /dev/null +++ b/queue-3.9/net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch @@ -0,0 +1,50 @@ +From 6f4f1077b6801410a3af4d3523a82823bdfdfc18 Mon Sep 17 00:00:00 2001 +From: Yoshihiro Shimoda +Date: Thu, 13 Jun 2013 10:15:45 +0900 +Subject: net: sh_eth: fix incorrect RX length error if R8A7740 + +From: Yoshihiro Shimoda + +[ Upstream commit dd019897358b815f7828dab90b51d51df4d3658d ] + +This patch fixes an issue that the driver increments the "RX length error" +on every buffer in sh_eth_rx() if the R8A7740. +This patch also adds a description about the Receive Frame Status bits. + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/sh_eth.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -1100,16 +1100,23 @@ static int sh_eth_rx(struct net_device * + desc_status = edmac_to_cpu(mdp, rxdesc->status); + pkt_len = rxdesc->frame_length; + +-#if defined(CONFIG_ARCH_R8A7740) +- desc_status >>= 16; +-#endif +- + if (--boguscnt < 0) + break; + + if (!(desc_status & RDFEND)) + ndev->stats.rx_length_errors++; + ++#if defined(CONFIG_ARCH_R8A7740) ++ /* ++ * In case of almost all GETHER/ETHERs, the Receive Frame State ++ * (RFS) bits in the Receive Descriptor 0 are from bit 9 to ++ * bit 0. However, in case of the R8A7740's GETHER, the RFS ++ * bits are from bit 25 to bit 16. So, the driver needs right ++ * shifting by 16. ++ */ ++ desc_status >>= 16; ++#endif ++ + if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 | + RD_RFS5 | RD_RFS6 | RD_RFS10)) { + ndev->stats.rx_errors++; diff --git a/queue-3.9/net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch b/queue-3.9/net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch new file mode 100644 index 00000000000..fa6c2ec21cb --- /dev/null +++ b/queue-3.9/net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch @@ -0,0 +1,46 @@ +From a21d200256ebea2a614980c1875c68aae0093b17 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 25 Mar 2013 20:19:59 +0000 +Subject: net_sched: better precise estimation on packet length for untrusted packets + +From: Jason Wang + +[ Upstream commit 15e5a030716468dce4032fa0f398d840fa2756f6 ] + +gso_segs were reset to zero when kernel receive packets from untrusted +source. But we use this zero value to estimate precise packet len which is +wrong. So this patch tries to estimate the correct gso_segs value before using +it in qdisc_pkt_len_init(). + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2592,6 +2592,7 @@ static void qdisc_pkt_len_init(struct sk + */ + if (shinfo->gso_size) { + unsigned int hdr_len; ++ u16 gso_segs = shinfo->gso_segs; + + /* mac layer + network layer */ + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); +@@ -2601,7 +2602,12 @@ static void qdisc_pkt_len_init(struct sk + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); +- qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; ++ ++ if (shinfo->gso_type & SKB_GSO_DODGY) ++ gso_segs = DIV_ROUND_UP(skb->len - hdr_len, ++ shinfo->gso_size); ++ ++ qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; + } + } + diff --git a/queue-3.9/net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch b/queue-3.9/net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch new file mode 100644 index 00000000000..1184cf808ca --- /dev/null +++ b/queue-3.9/net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch @@ -0,0 +1,131 @@ +From e58f91177b3afd840ed46f98d33b08aa499a920f Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 4 Jun 2013 07:11:48 +0000 +Subject: net_sched: htb: do not mix 1ns and 64ns time units + +From: Eric Dumazet + +[ Upstream commit 5343a7f8be11951cb3095b91e8e4eb506cfacc0f ] + +commit 56b765b79 ("htb: improved accuracy at high rates") added another +regression for low rates, because it mixes 1ns and 64ns time units. + +So the maximum delay (mbuffer) was not 60 second, but 937 ms. + +Lets convert all time fields to 1ns as 64bit arches are becoming the +norm. + +Reported-by: Jesper Dangaard Brouer +Signed-off-by: Eric Dumazet +Tested-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_htb.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +--- a/net/sched/sch_htb.c ++++ b/net/sched/sch_htb.c +@@ -109,7 +109,7 @@ struct htb_class { + } un; + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + struct rb_node pq_node; /* node for event queue */ +- psched_time_t pq_key; ++ s64 pq_key; + + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ +@@ -121,10 +121,10 @@ struct htb_class { + /* token bucket parameters */ + struct psched_ratecfg rate; + struct psched_ratecfg ceil; +- s64 buffer, cbuffer; /* token bucket depth/rate */ +- psched_tdiff_t mbuffer; /* max wait time */ +- s64 tokens, ctokens; /* current number of tokens */ +- psched_time_t t_c; /* checkpoint time */ ++ s64 buffer, cbuffer; /* token bucket depth/rate */ ++ s64 mbuffer; /* max wait time */ ++ s64 tokens, ctokens; /* current number of tokens */ ++ s64 t_c; /* checkpoint time */ + }; + + struct htb_sched { +@@ -141,15 +141,15 @@ struct htb_sched { + struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + + /* time of nearest event per level (row) */ +- psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; ++ s64 near_ev_cache[TC_HTB_MAXDEPTH]; + + int defcls; /* class where unclassified flows go to */ + + /* filters for qdisc itself */ + struct tcf_proto *filter_list; + +- int rate2quantum; /* quant = rate / rate2quantum */ +- psched_time_t now; /* cached dequeue time */ ++ int rate2quantum; /* quant = rate / rate2quantum */ ++ s64 now; /* cached dequeue time */ + struct qdisc_watchdog watchdog; + + /* non shaped skbs; let them go directly thru */ +@@ -664,8 +664,8 @@ static void htb_charge_class(struct htb_ + * next pending event (0 for no event in pq, q->now for too many events). + * Note: Applied are events whose have cl->pq_key <= q->now. + */ +-static psched_time_t htb_do_events(struct htb_sched *q, int level, +- unsigned long start) ++static s64 htb_do_events(struct htb_sched *q, int level, ++ unsigned long start) + { + /* don't run for longer than 2 jiffies; 2 is used instead of + * 1 to simplify things when jiffy is going to be incremented +@@ -857,7 +857,7 @@ static struct sk_buff *htb_dequeue(struc + struct sk_buff *skb; + struct htb_sched *q = qdisc_priv(sch); + int level; +- psched_time_t next_event; ++ s64 next_event; + unsigned long start_at; + + /* try to dequeue direct packets as high prio (!) to minimize cpu work */ +@@ -880,7 +880,7 @@ ok: + for (level = 0; level < TC_HTB_MAXDEPTH; level++) { + /* common case optimization - skip event handler quickly */ + int m; +- psched_time_t event; ++ s64 event; + + if (q->now >= q->near_ev_cache[level]) { + event = htb_do_events(q, level, start_at); +@@ -1116,8 +1116,8 @@ htb_dump_class_stats(struct Qdisc *sch, + + if (!cl->level && cl->un.leaf.q) + cl->qstats.qlen = cl->un.leaf.q->q.qlen; +- cl->xstats.tokens = cl->tokens; +- cl->xstats.ctokens = cl->ctokens; ++ cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); ++ cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || +@@ -1199,7 +1199,7 @@ static void htb_parent_to_leaf(struct ht + parent->un.leaf.q = new_q ? new_q : &noop_qdisc; + parent->tokens = parent->buffer; + parent->ctokens = parent->cbuffer; +- parent->t_c = psched_get_time(); ++ parent->t_c = ktime_to_ns(ktime_get()); + parent->cmode = HTB_CAN_SEND; + } + +@@ -1416,8 +1416,8 @@ static int htb_change_class(struct Qdisc + /* set class to be in HTB_CAN_SEND state */ + cl->tokens = PSCHED_TICKS2NS(hopt->buffer); + cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); +- cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ +- cl->t_c = psched_get_time(); ++ cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ ++ cl->t_c = ktime_to_ns(ktime_get()); + cl->cmode = HTB_CAN_SEND; + + /* attach to the hash list and parent's family */ diff --git a/queue-3.9/net_sched-restore-overhead-xxx-handling.patch b/queue-3.9/net_sched-restore-overhead-xxx-handling.patch new file mode 100644 index 00000000000..3aba055c0d7 --- /dev/null +++ b/queue-3.9/net_sched-restore-overhead-xxx-handling.patch @@ -0,0 +1,174 @@ +From e0e2c8acec40979d4818bf69e6e8f7aaf40d1fcc Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sun, 2 Jun 2013 13:55:05 +0000 +Subject: net_sched: restore "overhead xxx" handling + +From: Eric Dumazet + +[ Upstream commit 01cb71d2d47b78354358e4bb938bb06323e17498 ] + +commit 56b765b79 ("htb: improved accuracy at high rates") +broke the "overhead xxx" handling, as well as the "linklayer atm" +attribute. + +tc class add ... htb rate X ceil Y linklayer atm overhead 10 + +This patch restores the "overhead xxx" handling, for htb, tbf +and act_police + +The "linklayer atm" thing needs a separate fix. + +Reported-by: Jesper Dangaard Brouer +Signed-off-by: Eric Dumazet +Cc: Vimalkumar +Cc: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sch_generic.h | 18 +++++++++++------- + net/sched/act_police.c | 8 ++++---- + net/sched/sch_generic.c | 8 +++++--- + net/sched/sch_htb.c | 8 ++++---- + net/sched/sch_tbf.c | 8 ++++---- + 5 files changed, 28 insertions(+), 22 deletions(-) + +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -679,22 +679,26 @@ static inline struct sk_buff *skb_act_cl + #endif + + struct psched_ratecfg { +- u64 rate_bps; +- u32 mult; +- u32 shift; ++ u64 rate_bps; ++ u32 mult; ++ u16 overhead; ++ u8 shift; + }; + + static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, + unsigned int len) + { +- return ((u64)len * r->mult) >> r->shift; ++ return ((u64)(len + r->overhead) * r->mult) >> r->shift; + } + +-extern void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate); ++extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); + +-static inline u32 psched_ratecfg_getrate(const struct psched_ratecfg *r) ++static inline void psched_ratecfg_getrate(struct tc_ratespec *res, ++ const struct psched_ratecfg *r) + { +- return r->rate_bps >> 3; ++ memset(res, 0, sizeof(*res)); ++ res->rate = r->rate_bps >> 3; ++ res->overhead = r->overhead; + } + + #endif +--- a/net/sched/act_police.c ++++ b/net/sched/act_police.c +@@ -231,14 +231,14 @@ override: + } + if (R_tab) { + police->rate_present = true; +- psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); ++ psched_ratecfg_precompute(&police->rate, &R_tab->rate); + qdisc_put_rtab(R_tab); + } else { + police->rate_present = false; + } + if (P_tab) { + police->peak_present = true; +- psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); ++ psched_ratecfg_precompute(&police->peak, &P_tab->rate); + qdisc_put_rtab(P_tab); + } else { + police->peak_present = false; +@@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, + }; + + if (police->rate_present) +- opt.rate.rate = psched_ratecfg_getrate(&police->rate); ++ psched_ratecfg_getrate(&opt.rate, &police->rate); + if (police->peak_present) +- opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); ++ psched_ratecfg_getrate(&opt.peakrate, &police->peak); + if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) + goto nla_put_failure; + if (police->tcfp_result && +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev + WARN_ON(timer_pending(&dev->watchdog_timer)); + } + +-void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) ++void psched_ratecfg_precompute(struct psched_ratecfg *r, ++ const struct tc_ratespec *conf) + { + u64 factor; + u64 mult; + int shift; + +- r->rate_bps = (u64)rate << 3; +- r->shift = 0; ++ memset(r, 0, sizeof(*r)); ++ r->overhead = conf->overhead; ++ r->rate_bps = (u64)conf->rate << 3; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate +--- a/net/sched/sch_htb.c ++++ b/net/sched/sch_htb.c +@@ -1089,9 +1089,9 @@ static int htb_dump_class(struct Qdisc * + + memset(&opt, 0, sizeof(opt)); + +- opt.rate.rate = psched_ratecfg_getrate(&cl->rate); ++ psched_ratecfg_getrate(&opt.rate, &cl->rate); + opt.buffer = PSCHED_NS2TICKS(cl->buffer); +- opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); ++ psched_ratecfg_getrate(&opt.ceil, &cl->ceil); + opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); + opt.quantum = cl->quantum; + opt.prio = cl->prio; +@@ -1458,8 +1458,8 @@ static int htb_change_class(struct Qdisc + cl->prio = TC_HTB_NUMPRIO - 1; + } + +- psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); +- psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); ++ psched_ratecfg_precompute(&cl->rate, &hopt->rate); ++ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); + + cl->buffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -298,9 +298,9 @@ static int tbf_change(struct Qdisc *sch, + q->tokens = q->buffer; + q->ptokens = q->mtu; + +- psched_ratecfg_precompute(&q->rate, rtab->rate.rate); ++ psched_ratecfg_precompute(&q->rate, &rtab->rate); + if (ptab) { +- psched_ratecfg_precompute(&q->peak, ptab->rate.rate); ++ psched_ratecfg_precompute(&q->peak, &ptab->rate); + q->peak_present = true; + } else { + q->peak_present = false; +@@ -350,9 +350,9 @@ static int tbf_dump(struct Qdisc *sch, s + goto nla_put_failure; + + opt.limit = q->limit; +- opt.rate.rate = psched_ratecfg_getrate(&q->rate); ++ psched_ratecfg_getrate(&opt.rate, &q->rate); + if (q->peak_present) +- opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); ++ psched_ratecfg_getrate(&opt.peakrate, &q->peak); + else + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + opt.mtu = PSCHED_NS2TICKS(q->mtu); diff --git a/queue-3.9/netback-set-transport-header-before-passing-it-to-kernel.patch b/queue-3.9/netback-set-transport-header-before-passing-it-to-kernel.patch new file mode 100644 index 00000000000..7a7d8cbe972 --- /dev/null +++ b/queue-3.9/netback-set-transport-header-before-passing-it-to-kernel.patch @@ -0,0 +1,70 @@ +From 26bd621f813429446709d120e9dde7c09386f0e0 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 25 Mar 2013 20:19:58 +0000 +Subject: netback: set transport header before passing it to kernel + +From: Jason Wang + +[ Upstream commit f9ca8f74399f9195fd8e01f67a8424a8d33efa55 ] + +Currently, for the packets receives from netback, before doing header check, +kernel just reset the transport header in netif_receive_skb() which pretends non +l4 header. This is suboptimal for precise packet length estimation (introduced +in 1def9238: net_sched: more precise pkt_len computation) which needs correct l4 +header for gso packets. + +The patch just reuse the header probed by netback for partial checksum packets +and tries to use skb_flow_dissect() for other cases, if both fail, just pretend +no l4 header. + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Cc: Ian Campbell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -39,6 +39,7 @@ + #include + + #include ++#include + + #include + #include +@@ -1352,6 +1353,7 @@ static int checksum_setup(struct xenvif + if (th >= skb_tail_pointer(skb)) + goto out; + ++ skb_set_transport_header(skb, 4 * iph->ihl); + skb->csum_start = th - skb->head; + switch (iph->protocol) { + case IPPROTO_TCP: +@@ -1665,6 +1667,7 @@ static void xen_netbk_tx_submit(struct x + + skb->dev = vif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); ++ skb_reset_network_header(skb); + + if (checksum_setup(vif, skb)) { + netdev_dbg(vif->dev, +@@ -1673,6 +1676,15 @@ static void xen_netbk_tx_submit(struct x + continue; + } + ++ if (!skb_transport_header_was_set(skb)) { ++ struct flow_keys keys; ++ ++ if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_reset_transport_header(skb); ++ } ++ + vif->dev->stats.rx_bytes += skb->len; + vif->dev->stats.rx_packets++; + diff --git a/queue-3.9/netlabel-improve-domain-mapping-validation.patch b/queue-3.9/netlabel-improve-domain-mapping-validation.patch new file mode 100644 index 00000000000..a0ae8c7674d --- /dev/null +++ b/queue-3.9/netlabel-improve-domain-mapping-validation.patch @@ -0,0 +1,116 @@ +From 1e738cfa84db031ca55ea8c779110fb469d785be Mon Sep 17 00:00:00 2001 +From: Paul Moore +Date: Fri, 17 May 2013 09:08:50 +0000 +Subject: netlabel: improve domain mapping validation + +From: Paul Moore + +[ Upstream commit 6b21e1b77d1a3d58ebfd513264c885695e8a0ba5 ] + +The net/netlabel/netlabel_domainhash.c:netlbl_domhsh_add() function +does not properly validate new domain hash entries resulting in +potential problems when an administrator attempts to add an invalid +entry. One such problem, as reported by Vlad Halilov, is a kernel +BUG (found in netlabel_domainhash.c:netlbl_domhsh_audit_add()) when +adding an IPv6 outbound mapping with a CIPSO configuration. + +This patch corrects this problem by adding the necessary validation +code to netlbl_domhsh_add() via the newly created +netlbl_domhsh_validate() function. + +Ideally this patch should also be pushed to the currently active +-stable trees. + +Reported-by: Vlad Halilov +Signed-off-by: Paul Moore +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlabel/netlabel_domainhash.c | 69 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 69 insertions(+) + +--- a/net/netlabel/netlabel_domainhash.c ++++ b/net/netlabel/netlabel_domainhash.c +@@ -245,6 +245,71 @@ static void netlbl_domhsh_audit_add(stru + } + } + ++/** ++ * netlbl_domhsh_validate - Validate a new domain mapping entry ++ * @entry: the entry to validate ++ * ++ * This function validates the new domain mapping entry to ensure that it is ++ * a valid entry. Returns zero on success, negative values on failure. ++ * ++ */ ++static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) ++{ ++ struct netlbl_af4list *iter4; ++ struct netlbl_domaddr4_map *map4; ++#if IS_ENABLED(CONFIG_IPV6) ++ struct netlbl_af6list *iter6; ++ struct netlbl_domaddr6_map *map6; ++#endif /* IPv6 */ ++ ++ if (entry == NULL) ++ return -EINVAL; ++ ++ switch (entry->type) { ++ case NETLBL_NLTYPE_UNLABELED: ++ if (entry->type_def.cipsov4 != NULL || ++ entry->type_def.addrsel != NULL) ++ return -EINVAL; ++ break; ++ case NETLBL_NLTYPE_CIPSOV4: ++ if (entry->type_def.cipsov4 == NULL) ++ return -EINVAL; ++ break; ++ case NETLBL_NLTYPE_ADDRSELECT: ++ netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { ++ map4 = netlbl_domhsh_addr4_entry(iter4); ++ switch (map4->type) { ++ case NETLBL_NLTYPE_UNLABELED: ++ if (map4->type_def.cipsov4 != NULL) ++ return -EINVAL; ++ break; ++ case NETLBL_NLTYPE_CIPSOV4: ++ if (map4->type_def.cipsov4 == NULL) ++ return -EINVAL; ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++#if IS_ENABLED(CONFIG_IPV6) ++ netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { ++ map6 = netlbl_domhsh_addr6_entry(iter6); ++ switch (map6->type) { ++ case NETLBL_NLTYPE_UNLABELED: ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++#endif /* IPv6 */ ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + /* + * Domain Hash Table Functions + */ +@@ -311,6 +376,10 @@ int netlbl_domhsh_add(struct netlbl_dom_ + struct netlbl_af6list *tmp6; + #endif /* IPv6 */ + ++ ret_val = netlbl_domhsh_validate(entry); ++ if (ret_val != 0) ++ return ret_val; ++ + /* XXX - we can remove this RCU read lock as the spinlock protects the + * entire function, but before we do we need to fixup the + * netlbl_af[4,6]list RCU functions to do "the right thing" with diff --git a/queue-3.9/packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch b/queue-3.9/packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch new file mode 100644 index 00000000000..d26ebb8604d --- /dev/null +++ b/queue-3.9/packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch @@ -0,0 +1,41 @@ +From a2ba7bc2f5944a82f7745a0c8f5f6819d4763671 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Wed, 12 Jun 2013 16:02:27 +0200 +Subject: packet: packet_getname_spkt: make sure string is always 0-terminated + +From: Daniel Borkmann + +[ Upstream commit 2dc85bf323515e59e15dfa858d1472bb25cad0fe ] + +uaddr->sa_data is exactly of size 14, which is hard-coded here and +passed as a size argument to strncpy(). A device name can be of size +IFNAMSIZ (== 16), meaning we might leave the destination string +unterminated. Thus, use strlcpy() and also sizeof() while we're +at it. We need to memset the data area beforehand, since strlcpy +does not padd the remaining buffer with zeroes for user space, so +that we do not possibly leak anything. + +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2769,12 +2769,11 @@ static int packet_getname_spkt(struct so + return -EOPNOTSUPP; + + uaddr->sa_family = AF_PACKET; ++ memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + if (dev) +- strncpy(uaddr->sa_data, dev->name, 14); +- else +- memset(uaddr->sa_data, 0, 14); ++ strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + rcu_read_unlock(); + *uaddr_len = sizeof(*uaddr); + diff --git a/queue-3.9/packet-set-transport-header-before-doing-xmit.patch b/queue-3.9/packet-set-transport-header-before-doing-xmit.patch new file mode 100644 index 00000000000..265e208cf0a --- /dev/null +++ b/queue-3.9/packet-set-transport-header-before-doing-xmit.patch @@ -0,0 +1,96 @@ +From ef3b314c6abfcfa731b42d5b4e0cff9fb57ae53f Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 25 Mar 2013 20:19:57 +0000 +Subject: packet: set transport header before doing xmit + +From: Jason Wang + +[ Upstream commit c1aad275b0293d2b1905ec95a945422262470684 ] + +Set the transport header for 1) some drivers (e.g ixgbe needs l4 header to do +atr) 2) precise packet length estimation (introduced in 1def9238) needs l4 +header to compute header length. + +So this patch first tries to get l4 header for packet socket through +skb_flow_dissect(), and pretend no l4 header if skb_flow_dissect() fails. + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -88,6 +88,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_INET + #include +@@ -1343,6 +1344,7 @@ static int packet_sendmsg_spkt(struct ki + __be16 proto = 0; + int err; + int extra_len = 0; ++ struct flow_keys keys; + + /* + * Get and verify the address. +@@ -1443,6 +1445,11 @@ retry: + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + ++ if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_reset_transport_header(skb); ++ + dev_queue_xmit(skb); + rcu_read_unlock(); + return len; +@@ -1849,6 +1856,7 @@ static int tpacket_fill_skb(struct packe + struct page *page; + void *data; + int err; ++ struct flow_keys keys; + + ph.raw = frame; + +@@ -1874,6 +1882,11 @@ static int tpacket_fill_skb(struct packe + skb_reserve(skb, hlen); + skb_reset_network_header(skb); + ++ if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_reset_transport_header(skb); ++ + if (po->tp_tx_has_off) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); +@@ -2130,6 +2143,7 @@ static int packet_snd(struct socket *soc + unsigned short gso_type = 0; + int hlen, tlen; + int extra_len = 0; ++ struct flow_keys keys; + + /* + * Get and verify the address. +@@ -2282,6 +2296,13 @@ static int packet_snd(struct socket *soc + len += vnet_hdr_len; + } + ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ skb_set_transport_header(skb, skb_checksum_start_offset(skb)); ++ else if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_set_transport_header(skb, reserve); ++ + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + diff --git a/queue-3.9/r8169-fix-offloaded-tx-checksum-for-small-packets.patch b/queue-3.9/r8169-fix-offloaded-tx-checksum-for-small-packets.patch new file mode 100644 index 00000000000..72028750969 --- /dev/null +++ b/queue-3.9/r8169-fix-offloaded-tx-checksum-for-small-packets.patch @@ -0,0 +1,108 @@ +From 0a1f685d4b458666b1f979cd46ec0a26da37b6fc Mon Sep 17 00:00:00 2001 +From: Francois Romieu +Date: Sat, 18 May 2013 01:24:46 +0000 +Subject: r8169: fix offloaded tx checksum for small packets. + +From: Francois Romieu + +[ Upstream commit b423e9ae49d78ea3f53b131c8d5a6087aed16fd6 ] + +8168evl offloaded checksums are wrong since commit +e5195c1f31f399289347e043d6abf3ffa80f0005 ("r8169: fix 8168evl frame padding.") +pads small packets to 60 bytes (without ethernet checksum). Typical symptoms +appear as UDP checksums which are wrong by the count of added bytes. + +It isn't worth compensating. Let the driver checksum. + +Due to the skb length changes, TSO code is moved before the Tx descriptor gets +written. + +Signed-off-by: Francois Romieu +Tested-by: Holger Hoffstätte +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 41 +++++++++++++++++++++++------------ + 1 file changed, 27 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -5747,7 +5747,20 @@ err_out: + return -EIO; + } + +-static inline void rtl8169_tso_csum(struct rtl8169_private *tp, ++static bool rtl_skb_pad(struct sk_buff *skb) ++{ ++ if (skb_padto(skb, ETH_ZLEN)) ++ return false; ++ skb_put(skb, ETH_ZLEN - skb->len); ++ return true; ++} ++ ++static bool rtl_test_hw_pad_bug(struct rtl8169_private *tp, struct sk_buff *skb) ++{ ++ return skb->len < ETH_ZLEN && tp->mac_version == RTL_GIGA_MAC_VER_34; ++} ++ ++static inline bool rtl8169_tso_csum(struct rtl8169_private *tp, + struct sk_buff *skb, u32 *opts) + { + const struct rtl_tx_desc_info *info = tx_desc_info + tp->txd_version; +@@ -5760,13 +5773,20 @@ static inline void rtl8169_tso_csum(stru + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + const struct iphdr *ip = ip_hdr(skb); + ++ if (unlikely(rtl_test_hw_pad_bug(tp, skb))) ++ return skb_checksum_help(skb) == 0 && rtl_skb_pad(skb); ++ + if (ip->protocol == IPPROTO_TCP) + opts[offset] |= info->checksum.tcp; + else if (ip->protocol == IPPROTO_UDP) + opts[offset] |= info->checksum.udp; + else + WARN_ON_ONCE(1); ++ } else { ++ if (unlikely(rtl_test_hw_pad_bug(tp, skb))) ++ return rtl_skb_pad(skb); + } ++ return true; + } + + static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, +@@ -5787,17 +5807,15 @@ static netdev_tx_t rtl8169_start_xmit(st + goto err_stop_0; + } + +- /* 8168evl does not automatically pad to minimum length. */ +- if (unlikely(tp->mac_version == RTL_GIGA_MAC_VER_34 && +- skb->len < ETH_ZLEN)) { +- if (skb_padto(skb, ETH_ZLEN)) +- goto err_update_stats; +- skb_put(skb, ETH_ZLEN - skb->len); +- } +- + if (unlikely(le32_to_cpu(txd->opts1) & DescOwn)) + goto err_stop_0; + ++ opts[1] = cpu_to_le32(rtl8169_tx_vlan_tag(skb)); ++ opts[0] = DescOwn; ++ ++ if (!rtl8169_tso_csum(tp, skb, opts)) ++ goto err_update_stats; ++ + len = skb_headlen(skb); + mapping = dma_map_single(d, skb->data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(d, mapping))) { +@@ -5809,11 +5827,6 @@ static netdev_tx_t rtl8169_start_xmit(st + tp->tx_skb[entry].len = len; + txd->addr = cpu_to_le64(mapping); + +- opts[1] = cpu_to_le32(rtl8169_tx_vlan_tag(skb)); +- opts[0] = DescOwn; +- +- rtl8169_tso_csum(tp, skb, opts); +- + frags = rtl8169_xmit_frags(tp, skb, opts); + if (frags < 0) + goto err_dma_1; diff --git a/queue-3.9/sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch b/queue-3.9/sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch new file mode 100644 index 00000000000..0cfa91fe1fd --- /dev/null +++ b/queue-3.9/sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch @@ -0,0 +1,58 @@ +From 244e71bb675cf9c545de7a1bfd9cfe9b1ff43750 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Wed, 12 Jun 2013 14:26:44 -0400 +Subject: sctp: fully initialize sctp_outq in sctp_outq_init + +From: Neil Horman + +[ Upstream commit c5c7774d7eb4397891edca9ebdf750ba90977a69 ] + +In commit 2f94aabd9f6c925d77aecb3ff020f1cc12ed8f86 +(refactor sctp_outq_teardown to insure proper re-initalization) +we modified sctp_outq_teardown to use sctp_outq_init to fully re-initalize the +outq structure. Steve West recently asked me why I removed the q->error = 0 +initalization from sctp_outq_teardown. I did so because I was operating under +the impression that sctp_outq_init would properly initalize that value for us, +but it doesn't. sctp_outq_init operates under the assumption that the outq +struct is all 0's (as it is when called from sctp_association_init), but using +it in __sctp_outq_teardown violates that assumption. We should do a memset in +sctp_outq_init to ensure that the entire structure is in a known state there +instead. + +Signed-off-by: Neil Horman +Reported-by: "West, Steve (NSN - US/Fort Worth)" +CC: Vlad Yasevich +CC: netdev@vger.kernel.org +CC: davem@davemloft.net +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/outqueue.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/net/sctp/outqueue.c ++++ b/net/sctp/outqueue.c +@@ -206,6 +206,8 @@ static inline int sctp_cacc_skip(struct + */ + void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) + { ++ memset(q, 0, sizeof(struct sctp_outq)); ++ + q->asoc = asoc; + INIT_LIST_HEAD(&q->out_chunk_list); + INIT_LIST_HEAD(&q->control_chunk_list); +@@ -213,13 +215,7 @@ void sctp_outq_init(struct sctp_associat + INIT_LIST_HEAD(&q->sacked); + INIT_LIST_HEAD(&q->abandoned); + +- q->fast_rtx = 0; +- q->outstanding_bytes = 0; + q->empty = 1; +- q->cork = 0; +- +- q->malloced = 0; +- q->out_qlen = 0; + } + + /* Free the outqueue structure and any related pending chunks. diff --git a/queue-3.9/series b/queue-3.9/series index a92e54311b3..27810245a41 100644 --- a/queue-3.9/series +++ b/queue-3.9/series @@ -40,3 +40,42 @@ range-do-not-add-new-blank-slot-with-add_range_with_merge.patch x86-mtrr-fix-original-mtrr-range-get-for-mtrr_cleanup.patch x86-fix-build-error-and-kconfig-for-ia32_emulation-and-binfmt.patch x86-fix-section-mismatch-on-load_ucode_ap.patch +net-fec-fix-kernel-oops-when-plug-unplug-cable-many-times.patch +tcp-fix-tcp_md5_hash_skb_data.patch +net-802-mrp-fix-lockdep-splat.patch +gianfar-add-missing-iounmap-on-error-in-gianfar_ptp_probe.patch +vxlan-update-vxlan-fdb-used-field-after-each-usage.patch +ipv6-fix-possible-crashes-in-ip6_cork_release.patch +netlabel-improve-domain-mapping-validation.patch +r8169-fix-offloaded-tx-checksum-for-small-packets.patch +8139cp-reset-bql-when-ring-tx-ring-cleared.patch +tcp-bug-fix-in-proportional-rate-reduction.patch +xfrm-properly-handle-invalid-states-as-an-error.patch +tcp-xps-fix-reordering-issues.patch +ip_tunnel-fix-kernel-panic-with-icmp_dest_unreach.patch +net-phy-fix-a-bug-when-verify-the-eee-support.patch +ipv4-fix-redirect-handling-for-tcp-packets.patch +net-block-msg_cmsg_compat-in-send-m-msg-and-recv-m-msg.patch +net-core-sock.c-add-missing-vsock-string-in-af_family_-_key_strings.patch +tuntap-forbid-changing-mq-flag-for-persistent-device.patch +udp6-fix-udp-fragmentation-for-tunnel-traffic.patch +net-force-a-reload-of-first-item-in-hlist_nulls_for_each_entry_rcu.patch +net_sched-restore-overhead-xxx-handling.patch +ipv6-assign-rt6_info-to-inet6_ifaddr-in-init_loopback.patch +net_sched-htb-do-not-mix-1ns-and-64ns-time-units.patch +vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch +net-sctp-fix-null-pointer-dereference-in-socket-destruction.patch +tuntap-set-sock_zerocopy-flag-during-open.patch +team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch +team-move-add-to-port-list-before-port-enablement.patch +packet-packet_getname_spkt-make-sure-string-is-always-0-terminated.patch +l2tp-fix-ppp-header-erasure-and-memory-leak.patch +l2tp-fix-sendmsg-return-value.patch +sctp-fully-initialize-sctp_outq-in-sctp_outq_init.patch +net-sh_eth-fix-incorrect-rx-length-error-if-r8a7740.patch +tuntap-correct-the-return-value-in-tun_set_iff.patch +macvtap-set-transport-header-before-passing-skb-to-lower-device.patch +tuntap-set-transport-header-before-passing-it-to-kernel.patch +packet-set-transport-header-before-doing-xmit.patch +netback-set-transport-header-before-passing-it-to-kernel.patch +net_sched-better-precise-estimation-on-packet-length-for-untrusted-packets.patch diff --git a/queue-3.9/tcp-bug-fix-in-proportional-rate-reduction.patch b/queue-3.9/tcp-bug-fix-in-proportional-rate-reduction.patch new file mode 100644 index 00000000000..46db5aadc69 --- /dev/null +++ b/queue-3.9/tcp-bug-fix-in-proportional-rate-reduction.patch @@ -0,0 +1,127 @@ +From 39500711fa9848f9d51013c7bd517b5adfd6ccba Mon Sep 17 00:00:00 2001 +From: Nandita Dukkipati +Date: Tue, 21 May 2013 15:12:07 +0000 +Subject: tcp: bug fix in proportional rate reduction. + +From: Nandita Dukkipati + +[ Upstream commit 35f079ebbc860dcd1cca70890c9c8d59c1145525 ] + +This patch is a fix for a bug triggering newly_acked_sacked < 0 +in tcp_ack(.). + +The bug is triggered by sacked_out decreasing relative to prior_sacked, +but packets_out remaining the same as pior_packets. This is because the +snapshot of prior_packets is taken after tcp_sacktag_write_queue() while +prior_sacked is captured before tcp_sacktag_write_queue(). The problem +is: tcp_sacktag_write_queue (tcp_match_skb_to_sack() -> tcp_fragment) +adjusts the pcount for packets_out and sacked_out (MSS change or other +reason). As a result, this delta in pcount is reflected in +(prior_sacked - sacked_out) but not in (prior_packets - packets_out). + +This patch does the following: +1) initializes prior_packets at the start of tcp_ack() so as to +capture the delta in packets_out created by tcp_fragment. +2) introduces a new "previous_packets_out" variable that snapshots +packets_out right before tcp_clean_rtx_queue, so pkts_acked can be +correctly computed as before. +3) Computes pkts_acked using previous_packets_out, and computes +newly_acked_sacked using prior_packets. + +Signed-off-by: Nandita Dukkipati +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2885,8 +2885,8 @@ static void tcp_enter_recovery(struct so + * tcp_xmit_retransmit_queue(). + */ + static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, +- int prior_sacked, bool is_dupack, +- int flag) ++ int prior_sacked, int prior_packets, ++ bool is_dupack, int flag) + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); +@@ -2952,7 +2952,8 @@ static void tcp_fastretrans_alert(struct + tcp_add_reno_sack(sk); + } else + do_lost = tcp_try_undo_partial(sk, pkts_acked); +- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; ++ newly_acked_sacked = prior_packets - tp->packets_out + ++ tp->sacked_out - prior_sacked; + break; + case TCP_CA_Loss: + if (flag & FLAG_DATA_ACKED) +@@ -2974,7 +2975,8 @@ static void tcp_fastretrans_alert(struct + if (is_dupack) + tcp_add_reno_sack(sk); + } +- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; ++ newly_acked_sacked = prior_packets - tp->packets_out + ++ tp->sacked_out - prior_sacked; + + if (icsk->icsk_ca_state <= TCP_CA_Disorder) + tcp_try_undo_dsack(sk); +@@ -3597,9 +3599,10 @@ static int tcp_ack(struct sock *sk, cons + bool is_dupack = false; + u32 prior_in_flight; + u32 prior_fackets; +- int prior_packets; ++ int prior_packets = tp->packets_out; + int prior_sacked = tp->sacked_out; + int pkts_acked = 0; ++ int previous_packets_out = 0; + bool frto_cwnd = false; + + /* If the ack is older than previous acks +@@ -3670,14 +3673,14 @@ static int tcp_ack(struct sock *sk, cons + sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; + tp->rcv_tstamp = tcp_time_stamp; +- prior_packets = tp->packets_out; + if (!prior_packets) + goto no_queue; + + /* See if we can take anything off of the retransmit queue. */ ++ previous_packets_out = tp->packets_out; + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + +- pkts_acked = prior_packets - tp->packets_out; ++ pkts_acked = previous_packets_out - tp->packets_out; + + if (tp->frto_counter) + frto_cwnd = tcp_process_frto(sk, flag); +@@ -3692,7 +3695,7 @@ static int tcp_ack(struct sock *sk, cons + tcp_cong_avoid(sk, ack, prior_in_flight); + is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, +- is_dupack, flag); ++ prior_packets, is_dupack, flag); + } else { + if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + tcp_cong_avoid(sk, ack, prior_in_flight); +@@ -3709,7 +3712,7 @@ no_queue: + /* If data was DSACKed, see if we can undo a cwnd reduction. */ + if (flag & FLAG_DSACKING_ACK) + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, +- is_dupack, flag); ++ prior_packets, is_dupack, flag); + /* If this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than + * it needs to be for normal retransmission. +@@ -3729,7 +3732,7 @@ old_ack: + if (TCP_SKB_CB(skb)->sacked) { + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, +- is_dupack, flag); ++ prior_packets, is_dupack, flag); + } + + SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); diff --git a/queue-3.9/tcp-fix-tcp_md5_hash_skb_data.patch b/queue-3.9/tcp-fix-tcp_md5_hash_skb_data.patch new file mode 100644 index 00000000000..1ad9e0be1b3 --- /dev/null +++ b/queue-3.9/tcp-fix-tcp_md5_hash_skb_data.patch @@ -0,0 +1,47 @@ +From 0fdefe46a0af027d26bf6773e380afc772b61ffb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 13 May 2013 21:25:52 +0000 +Subject: tcp: fix tcp_md5_hash_skb_data() + +From: Eric Dumazet + +[ Upstream commit 54d27fcb338bd9c42d1dfc5a39e18f6f9d373c2e ] + +TCP md5 communications fail [1] for some devices, because sg/crypto code +assume page offsets are below PAGE_SIZE. + +This was discovered using mlx4 driver [2], but I suspect loopback +might trigger the same bug now we use order-3 pages in tcp_sendmsg() + +[1] Failure is giving following messages. + +huh, entered softirq 3 NET_RX ffffffff806ad230 preempt_count 00000100, +exited with 00000101? + +[2] mlx4 driver uses order-2 pages to allocate RX frags + +Reported-by: Matt Schnall +Signed-off-by: Eric Dumazet +Cc: Bernhard Beck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3383,8 +3383,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5 + + for (i = 0; i < shi->nr_frags; ++i) { + const struct skb_frag_struct *f = &shi->frags[i]; +- struct page *page = skb_frag_page(f); +- sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); ++ unsigned int offset = f->page_offset; ++ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); ++ ++ sg_set_page(&sg, page, skb_frag_size(f), ++ offset_in_page(offset)); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) + return 1; + } diff --git a/queue-3.9/tcp-xps-fix-reordering-issues.patch b/queue-3.9/tcp-xps-fix-reordering-issues.patch new file mode 100644 index 00000000000..31edff569a1 --- /dev/null +++ b/queue-3.9/tcp-xps-fix-reordering-issues.patch @@ -0,0 +1,47 @@ +From 05b8b0474327041cd47b34607e4bcf0a9aaa4573 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 23 May 2013 07:44:20 +0000 +Subject: tcp: xps: fix reordering issues + +From: Eric Dumazet + +[ Upstream commit 547669d483e5783d722772af1483fa474da7caf9 ] + +commit 3853b5841c01a ("xps: Improvements in TX queue selection") +introduced ooo_okay flag, but the condition to set it is slightly wrong. + +In our traces, we have seen ACK packets being received out of order, +and RST packets sent in response. + +We should test if we have any packets still in host queue. + +Signed-off-by: Eric Dumazet +Cc: Tom Herbert +Cc: Yuchung Cheng +Cc: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1032,11 +1032,13 @@ static int tcp_transmit_skb(struct sock + &md5); + tcp_header_size = tcp_options_size + sizeof(struct tcphdr); + +- if (tcp_packets_in_flight(tp) == 0) { ++ if (tcp_packets_in_flight(tp) == 0) + tcp_ca_event(sk, CA_EVENT_TX_START); +- skb->ooo_okay = 1; +- } else +- skb->ooo_okay = 0; ++ ++ /* if no packet is in qdisc/device queue, then allow XPS to select ++ * another queue. ++ */ ++ skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; + + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); diff --git a/queue-3.9/team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch b/queue-3.9/team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch new file mode 100644 index 00000000000..e3ab0917354 --- /dev/null +++ b/queue-3.9/team-check-return-value-of-team_get_port_by_index_rcu-for-null.patch @@ -0,0 +1,34 @@ +From aa683112fc03a7f8e299a8584ddcdbad35928b80 Mon Sep 17 00:00:00 2001 +From: Jiri Pirko +Date: Sat, 8 Jun 2013 15:00:53 +0200 +Subject: team: check return value of team_get_port_by_index_rcu() for NULL + +From: Jiri Pirko + +[ Upstream commit 76c455decbbad31de21c727edb184a963f42b40b ] + +team_get_port_by_index_rcu() might return NULL due to race between port +removal and skb tx path. Panic is easily triggeable when txing packets +and adding/removing port in a loop. + +introduced by commit 3d249d4ca "net: introduce ethernet teaming device" +and commit 753f993911b "team: introduce random mode" (for random mode) + +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team_mode_roundrobin.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/team/team_mode_roundrobin.c ++++ b/drivers/net/team/team_mode_roundrobin.c +@@ -52,6 +52,8 @@ static bool rr_transmit(struct team *tea + + port_index = rr_priv(team)->sent_packets++ % team->en_port_count; + port = team_get_port_by_index_rcu(team, port_index); ++ if (unlikely(!port)) ++ goto drop; + port = __get_first_port_up(team, port); + if (unlikely(!port)) + goto drop; diff --git a/queue-3.9/team-move-add-to-port-list-before-port-enablement.patch b/queue-3.9/team-move-add-to-port-list-before-port-enablement.patch new file mode 100644 index 00000000000..cbfc0945955 --- /dev/null +++ b/queue-3.9/team-move-add-to-port-list-before-port-enablement.patch @@ -0,0 +1,38 @@ +From 4b8ccf0265a4d5a534142c817b93ad67a460acb4 Mon Sep 17 00:00:00 2001 +From: Jiri Pirko +Date: Sat, 8 Jun 2013 15:00:54 +0200 +Subject: team: move add to port list before port enablement + +From: Jiri Pirko + +[ Upstream commit 72df935d985c1575ed44ad2c8c653b28147993fa ] + +team_port_enable() adds port to port_hashlist. Reader sees port +in team_get_port_by_index_rcu() and returns it, but +team_get_first_port_txable_rcu() tries to go through port_list, where the +port is not inserted yet -> NULL pointer dereference. +Fix this by reordering port_list and port_hashlist insertion. +Panic is easily triggeable when txing packets and adding/removing port +in a loop. + +Introduced by commit 3d249d4c "net: introduce ethernet teaming device" + +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1079,8 +1079,8 @@ static int team_port_add(struct team *te + } + + port->index = -1; +- team_port_enable(team, port); + list_add_tail_rcu(&port->list, &team->port_list); ++ team_port_enable(team, port); + __team_compute_features(team); + __team_port_change_port_added(port, !!netif_carrier_ok(port_dev)); + __team_options_change_check(team); diff --git a/queue-3.9/tuntap-correct-the-return-value-in-tun_set_iff.patch b/queue-3.9/tuntap-correct-the-return-value-in-tun_set_iff.patch new file mode 100644 index 00000000000..86c065410ad --- /dev/null +++ b/queue-3.9/tuntap-correct-the-return-value-in-tun_set_iff.patch @@ -0,0 +1,45 @@ +From 4ab65f85b4ffc8c2a0206da23ba341ad9e86b95d Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 22 Apr 2013 20:40:39 +0000 +Subject: tuntap: correct the return value in tun_set_iff() + +From: Jason Wang + +[ Upstream commit e8dbad66ef56074eadb41ed5998acd2320447018 ] + +commit (3be8fbab tuntap: fix error return code in tun_set_iff()) breaks the +creation of multiqueue tuntap since it forbids to create more than one queues +for a multiqueue tuntap device. We need return 0 instead -EBUSY here since we +don't want to re-initialize the device when one or more queues has been already +attached. Add a comment and correct the return value to zero. + +Reported-by: Jerry Chu +Cc: Jerry Chu +Cc: Wei Yongjun +Cc: Eric Dumazet +Signed-off-by: Jason Wang +Acked-by: Jerry Chu +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1600,8 +1600,12 @@ static int tun_set_iff(struct net *net, + return err; + + if (tun->flags & TUN_TAP_MQ && +- (tun->numqueues + tun->numdisabled > 1)) +- return -EBUSY; ++ (tun->numqueues + tun->numdisabled > 1)) { ++ /* One or more queue has already been attached, no need ++ * to initialize the device again. ++ */ ++ return 0; ++ } + } + else { + char *name; diff --git a/queue-3.9/tuntap-forbid-changing-mq-flag-for-persistent-device.patch b/queue-3.9/tuntap-forbid-changing-mq-flag-for-persistent-device.patch new file mode 100644 index 00000000000..439d74a001f --- /dev/null +++ b/queue-3.9/tuntap-forbid-changing-mq-flag-for-persistent-device.patch @@ -0,0 +1,43 @@ +From 2c842b06d95ab8388d58d21b8d6ef60545415cd5 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 28 May 2013 18:32:11 +0000 +Subject: tuntap: forbid changing mq flag for persistent device + +From: Jason Wang + +[ Upstream commit 8e6d91ae0917bf934ed86411148f79d904728d51 ] + +We currently allow changing the mq flag (IFF_MULTI_QUEUE) for a persistent +device. This will result a mismatch between the number the queues in netdev and +tuntap. This is because we only allocate a 1q netdevice when IFF_MULTI_QUEUE was +not specified, so when we set the IFF_MULTI_QUEUE and try to attach more queues +later, netif_set_real_num_tx_queues() may fail which result a single queue +netdevice with multiple sockets attached. + +Solve this by disallowing changing the mq flag for persistent device. + +Bug was introduced by commit edfb6a148ce62e5e19354a1dcd9a34e00815c2a1 +(tuntap: reduce memory using of queues). + +Reported-by: Sriram Narasimhan +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1585,6 +1585,10 @@ static int tun_set_iff(struct net *net, + else + return -EINVAL; + ++ if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != ++ !!(tun->flags & TUN_TAP_MQ)) ++ return -EINVAL; ++ + if (tun_not_capable(tun)) + return -EPERM; + err = security_tun_dev_open(tun->security); diff --git a/queue-3.9/tuntap-set-sock_zerocopy-flag-during-open.patch b/queue-3.9/tuntap-set-sock_zerocopy-flag-during-open.patch new file mode 100644 index 00000000000..a26b9eaa548 --- /dev/null +++ b/queue-3.9/tuntap-set-sock_zerocopy-flag-during-open.patch @@ -0,0 +1,34 @@ +From 50531bbbdbb57b1481b12de9a27254113fb10820 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Sat, 8 Jun 2013 14:17:41 +0800 +Subject: tuntap: set SOCK_ZEROCOPY flag during open + +From: Jason Wang + +[ Upstream commit 19a6afb23e5d323e1245baa4e62755492b2f1200 ] + +Commit 54f968d6efdbf7dec36faa44fc11f01b0e4d1990 +(tuntap: move socket to tun_file) forgets to set SOCK_ZEROCOPY flag, which will +prevent vhost_net from doing zercopy w/ tap. This patch fixes this by setting +it during file open. + +Signed-off-by: Jason Wang +Cc: Michael S. Tsirkin +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2154,6 +2154,8 @@ static int tun_chr_open(struct inode *in + set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); + INIT_LIST_HEAD(&tfile->next); + ++ sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); ++ + return 0; + } + diff --git a/queue-3.9/tuntap-set-transport-header-before-passing-it-to-kernel.patch b/queue-3.9/tuntap-set-transport-header-before-passing-it-to-kernel.patch new file mode 100644 index 00000000000..00693a63444 --- /dev/null +++ b/queue-3.9/tuntap-set-transport-header-before-passing-it-to-kernel.patch @@ -0,0 +1,59 @@ +From b8d36d82fe14dc012f0ada08a1046c1053fa6504 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Mon, 25 Mar 2013 20:19:56 +0000 +Subject: tuntap: set transport header before passing it to kernel + +From: Jason Wang + +[ Upstream commit 38502af77e07b5d6650b9ff99a0b482d86366592 ] + +Currently, for the packets receives from tuntap, before doing header check, +kernel just reset the transport header in netif_receive_skb() which pretends no +l4 header. This is suboptimal for precise packet length estimation (introduced +in 1def9238) which needs correct l4 header for gso packets. + +So this patch set the transport header to csum_start for partial checksum +packets, otherwise it first try skb_flow_dissect(), if it fails, just reset the +transport header. + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -70,6 +70,7 @@ + #include + + #include ++#include + + /* Uncomment to enable debugging */ + /* #define TUN_DEBUG 1 */ +@@ -1051,6 +1052,7 @@ static ssize_t tun_get_user(struct tun_s + bool zerocopy = false; + int err; + u32 rxhash; ++ struct flow_keys keys; + + if (!(tun->flags & TUN_NO_PI)) { + if ((len -= sizeof(pi)) > total_len) +@@ -1205,6 +1207,14 @@ static ssize_t tun_get_user(struct tun_s + } + + skb_reset_network_header(skb); ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ skb_set_transport_header(skb, skb_checksum_start_offset(skb)); ++ else if (skb_flow_dissect(skb, &keys)) ++ skb_set_transport_header(skb, keys.thoff); ++ else ++ skb_reset_transport_header(skb); ++ + rxhash = skb_get_rxhash(skb); + netif_rx_ni(skb); + diff --git a/queue-3.9/udp6-fix-udp-fragmentation-for-tunnel-traffic.patch b/queue-3.9/udp6-fix-udp-fragmentation-for-tunnel-traffic.patch new file mode 100644 index 00000000000..0a2e6d88ed2 --- /dev/null +++ b/queue-3.9/udp6-fix-udp-fragmentation-for-tunnel-traffic.patch @@ -0,0 +1,99 @@ +From 23535349e02c17e65685e1992ca33c677a101e9e Mon Sep 17 00:00:00 2001 +From: Pravin B Shelar +Date: Thu, 30 May 2013 06:45:27 +0000 +Subject: udp6: Fix udp fragmentation for tunnel traffic. + +From: Pravin B Shelar + +[ Upstream commit 1e2bd517c108816220f262d7954b697af03b5f9c ] + +udp6 over GRE tunnel does not work after to GRE tso changes. GRE +tso handler passes inner packet but keeps track of outer header +start in SKB_GSO_CB(skb)->mac_offset. udp6 fragment need to +take care of outer header, which start at the mac_offset, while +adding fragment header. +This bug is introduced by commit 68c3316311 (GRE: Add TCP +segmentation offload for GRE). + +Reported-by: Dmitry Kravkov +Signed-off-by: Pravin B Shelar +Tested-by: Dmitry Kravkov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 15 +++++++++++++++ + net/ipv6/udp_offload.c | 20 ++++++++++++-------- + 2 files changed, 27 insertions(+), 8 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2761,6 +2761,21 @@ static inline int skb_tnl_header_len(con + SKB_GSO_CB(inner_skb)->mac_offset; + } + ++static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) ++{ ++ int new_headroom, headroom; ++ int ret; ++ ++ headroom = skb_headroom(skb); ++ ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); ++ if (ret) ++ return ret; ++ ++ new_headroom = skb_headroom(skb); ++ SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); ++ return 0; ++} ++ + static inline bool skb_is_gso(const struct sk_buff *skb) + { + return skb_shinfo(skb)->gso_size; +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -42,11 +42,12 @@ static struct sk_buff *udp6_ufo_fragment + unsigned int mss; + unsigned int unfrag_ip6hlen, unfrag_len; + struct frag_hdr *fptr; +- u8 *mac_start, *prevhdr; ++ u8 *packet_start, *prevhdr; + u8 nexthdr; + u8 frag_hdr_sz = sizeof(struct frag_hdr); + int offset; + __wsum csum; ++ int tnl_hlen; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) +@@ -77,9 +78,11 @@ static struct sk_buff *udp6_ufo_fragment + skb->ip_summed = CHECKSUM_NONE; + + /* Check if there is enough headroom to insert fragment header. */ +- if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && +- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) +- goto out; ++ tnl_hlen = skb_tnl_header_len(skb); ++ if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { ++ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) ++ goto out; ++ } + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. +@@ -87,11 +90,12 @@ static struct sk_buff *udp6_ufo_fragment + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; +- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + +- unfrag_ip6hlen; +- mac_start = skb_mac_header(skb); +- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); ++ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + ++ unfrag_ip6hlen + tnl_hlen; ++ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; ++ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + ++ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + diff --git a/queue-3.9/vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch b/queue-3.9/vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch new file mode 100644 index 00000000000..264e0bd812f --- /dev/null +++ b/queue-3.9/vhost_net-clear-msg.control-for-non-zerocopy-case-during-tx.patch @@ -0,0 +1,59 @@ +From abaf419d84a3495c8779e2e0ecba2c3bba8063aa Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Wed, 5 Jun 2013 15:40:46 +0800 +Subject: vhost_net: clear msg.control for non-zerocopy case during tx + +From: Jason Wang + +[ Upstream commit 4364d5f96eed7994a2c625bd9216656e55fba0cb ] + +When we decide not use zero-copy, msg.control should be set to NULL otherwise +macvtap/tap may set zerocopy callbacks which may decrease the kref of ubufs +wrongly. + +Bug were introduced by commit cedb9bdce099206290a2bdd02ce47a7b253b6a84 +(vhost-net: skip head management if no outstanding). + +This solves the following warnings: + +WARNING: at include/linux/kref.h:47 handle_tx+0x477/0x4b0 [vhost_net]() +Modules linked in: vhost_net macvtap macvlan tun nfsd exportfs bridge stp llc openvswitch kvm_amd kvm bnx2 megaraid_sas [last unloaded: tun] +CPU: 5 PID: 8670 Comm: vhost-8668 Not tainted 3.10.0-rc2+ #1566 +Hardware name: Dell Inc. PowerEdge R715/00XHKG, BIOS 1.5.2 04/19/2011 +ffffffffa0198323 ffff88007c9ebd08 ffffffff81796b73 ffff88007c9ebd48 +ffffffff8103d66b 000000007b773e20 ffff8800779f0000 ffff8800779f43f0 +ffff8800779f8418 000000000000015c 0000000000000062 ffff88007c9ebd58 +Call Trace: +[] dump_stack+0x19/0x1e +[] warn_slowpath_common+0x6b/0xa0 +[] warn_slowpath_null+0x15/0x20 +[] handle_tx+0x477/0x4b0 [vhost_net] +[] handle_tx_kick+0x10/0x20 [vhost_net] +[] vhost_worker+0xfe/0x1a0 [vhost_net] +[] ? vhost_attach_cgroups_work+0x30/0x30 [vhost_net] +[] ? vhost_attach_cgroups_work+0x30/0x30 [vhost_net] +[] kthread+0xc6/0xd0 +[] ? kthread_freezable_should_stop+0x70/0x70 +[] ret_from_fork+0x7c/0xb0 +[] ? kthread_freezable_should_stop+0x70/0x70 + +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -353,7 +353,8 @@ static void handle_tx(struct vhost_net * + kref_get(&ubufs->kref); + } + vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; +- } ++ } else ++ msg.msg_control = NULL; + /* TODO: Check specific error and bomb out unless ENOBUFS? */ + err = sock->ops->sendmsg(NULL, sock, &msg, len); + if (unlikely(err < 0)) { diff --git a/queue-3.9/vxlan-update-vxlan-fdb-used-field-after-each-usage.patch b/queue-3.9/vxlan-update-vxlan-fdb-used-field-after-each-usage.patch new file mode 100644 index 00000000000..ab08633eb35 --- /dev/null +++ b/queue-3.9/vxlan-update-vxlan-fdb-used-field-after-each-usage.patch @@ -0,0 +1,69 @@ +From 70a3447bb5efc663377fab1ab48516696ebbd714 Mon Sep 17 00:00:00 2001 +From: Sridhar Samudrala +Date: Fri, 17 May 2013 06:39:07 +0000 +Subject: vxlan: Update vxlan fdb 'used' field after each usage + +From: Sridhar Samudrala + +[ Upstream commit 014be2c8eac3381e202f684c1f35ae184a8b152b ] + +Fix some instances where vxlan fdb 'used' field is not updated after the entry +is used. + +v2: rename vxlan_find_mac() as __vxlan_find_mac() and create a new vxlan_find_mac() +that also updates ->used field. + +Signed-off-by: Sridhar Samudrala +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -285,7 +285,7 @@ static inline struct hlist_head *vxlan_f + } + + /* Look up Ethernet address in forwarding table */ +-static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, ++static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, + const u8 *mac) + + { +@@ -300,6 +300,18 @@ static struct vxlan_fdb *vxlan_find_mac( + return NULL; + } + ++static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, ++ const u8 *mac) ++{ ++ struct vxlan_fdb *f; ++ ++ f = __vxlan_find_mac(vxlan, mac); ++ if (f) ++ f->used = jiffies; ++ ++ return f; ++} ++ + /* Add new entry to forwarding table -- assumes lock held */ + static int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, __be32 ip, +@@ -308,7 +320,7 @@ static int vxlan_fdb_create(struct vxlan + struct vxlan_fdb *f; + int notify = 0; + +- f = vxlan_find_mac(vxlan, mac); ++ f = __vxlan_find_mac(vxlan, mac); + if (f) { + if (flags & NLM_F_EXCL) { + netdev_dbg(vxlan->dev, +@@ -453,7 +465,6 @@ static void vxlan_snoop(struct net_devic + + f = vxlan_find_mac(vxlan, src_mac); + if (likely(f)) { +- f->used = jiffies; + if (likely(f->remote_ip == src_ip)) + return; + diff --git a/queue-3.9/xfrm-properly-handle-invalid-states-as-an-error.patch b/queue-3.9/xfrm-properly-handle-invalid-states-as-an-error.patch new file mode 100644 index 00000000000..7b387b0fbe7 --- /dev/null +++ b/queue-3.9/xfrm-properly-handle-invalid-states-as-an-error.patch @@ -0,0 +1,36 @@ +From dabfc479b73e468e06a8aa74432b0ac34b3ca7e1 Mon Sep 17 00:00:00 2001 +From: Timo Teräs +Date: Wed, 22 May 2013 01:40:47 +0000 +Subject: xfrm: properly handle invalid states as an error + +From: Timo Teräs + +[ Upstream commit 497574c72c9922cf20c12aed15313c389f722fa0 ] + +The error exit path needs err explicitly set. Otherwise it +returns success and the only caller, xfrm_output_resume(), +would oops in skb_dst(skb)->ops derefence as skb_dst(skb) is +NULL. + +Bug introduced in commit bb65a9cb (xfrm: removes a superfluous +check and add a statistic). + +Signed-off-by: Timo Teräs +Cc: Li RongQing +Cc: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/xfrm/xfrm_output.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/xfrm/xfrm_output.c ++++ b/net/xfrm/xfrm_output.c +@@ -64,6 +64,7 @@ static int xfrm_output_one(struct sk_buf + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); ++ err = -EINVAL; + goto error; + } + -- 2.47.3