From: Greg Kroah-Hartman Date: Mon, 13 Jan 2014 17:49:25 +0000 (-0800) Subject: 3.10-stable patches X-Git-Tag: v3.4.77~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=46f3093512d8c5e2c198b3aff417eaf83613269e;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch hamradio-yam-fix-info-leak-in-ioctl.patch ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch ipv6-fix-illegal-mac_header-comparison-on-32bit.patch ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch macvtap-do-not-double-count-received-packets.patch macvtap-signal-truncated-packets.patch macvtap-update-file-current-position.patch net-do-not-pretend-fraglist-support.patch net-drop_monitor-fix-the-value-of-maxattr.patch net-fec-fix-potential-use-after-free.patch net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch net-rose-restore-old-recvmsg-behavior.patch net-unix-allow-bind-to-fail-on-mutex-lock.patch net-unix-allow-set_peek_off-to-fail.patch netpoll-fix-missing-txq-unlock-and-and-oops.patch netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch packet-fix-send-path-when-running-with-proto-0.patch rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch rds-prevent-dereference-of-a-null-device.patch tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch tun-update-file-current-position.patch virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch virtio-net-fix-refill-races-during-restore.patch virtio-net-make-all-rx-paths-handle-errors-consistently.patch virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch virtio_net-fix-error-handling-for-mergeable-buffers.patch vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch --- diff --git a/queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch b/queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch new file mode 100644 index 00000000000..d87120de101 --- /dev/null +++ b/queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch @@ -0,0 +1,63 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Curt Brune +Date: Mon, 6 Jan 2014 11:00:32 -0800 +Subject: bridge: use spin_lock_bh() in br_multicast_set_hash_max + +From: Curt Brune + +[ Upstream commit fe0d692bbc645786bce1a98439e548ae619269f5 ] + +br_multicast_set_hash_max() is called from process context in +net/bridge/br_sysfs_br.c by the sysfs store_hash_max() function. + +br_multicast_set_hash_max() calls spin_lock(&br->multicast_lock), +which can deadlock the CPU if a softirq that also tries to take the +same lock interrupts br_multicast_set_hash_max() while the lock is +held . This can happen quite easily when any of the bridge multicast +timers expire, which try to take the same lock. + +The fix here is to use spin_lock_bh(), preventing other softirqs from +executing on this CPU. + +Steps to reproduce: + +1. Create a bridge with several interfaces (I used 4). +2. Set the "multicast query interval" to a low number, like 2. +3. Enable the bridge as a multicast querier. +4. Repeatedly set the bridge hash_max parameter via sysfs. + + # brctl addbr br0 + # brctl addif br0 eth1 eth2 eth3 eth4 + # brctl setmcqi br0 2 + # brctl setmcquerier br0 1 + + # while true ; do echo 4096 > /sys/class/net/br0/bridge/hash_max; done + +Signed-off-by: Curt Brune +Signed-off-by: Scott Feldman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1839,7 +1839,7 @@ int br_multicast_set_hash_max(struct net + u32 old; + struct net_bridge_mdb_htable *mdb; + +- spin_lock(&br->multicast_lock); ++ spin_lock_bh(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + +@@ -1871,7 +1871,7 @@ rollback: + } + + unlock: +- spin_unlock(&br->multicast_lock); ++ spin_unlock_bh(&br->multicast_lock); + + return err; + } diff --git a/queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch b/queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch new file mode 100644 index 00000000000..89cd4d345f1 --- /dev/null +++ b/queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch @@ -0,0 +1,32 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Wenliang Fan +Date: Tue, 17 Dec 2013 11:25:28 +0800 +Subject: drivers/net/hamradio: Integer overflow in hdlcdrv_ioctl() + +From: Wenliang Fan + +[ Upstream commit e9db5c21d3646a6454fcd04938dd215ac3ab620a ] + +The local variable 'bi' comes from userspace. If userspace passed a +large number to 'bi.data.calibrate', there would be an integer overflow +in the following line: + s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; + +Signed-off-by: Wenliang Fan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hamradio/hdlcdrv.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/hamradio/hdlcdrv.c ++++ b/drivers/net/hamradio/hdlcdrv.c +@@ -571,6 +571,8 @@ static int hdlcdrv_ioctl(struct net_devi + case HDLCDRVCTL_CALIBRATE: + if(!capable(CAP_SYS_RAWIO)) + return -EPERM; ++ if (bi.data.calibrate > INT_MAX / s->par.bitrate) ++ return -EINVAL; + s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; + return 0; + diff --git a/queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch b/queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch new file mode 100644 index 00000000000..06773fddf6e --- /dev/null +++ b/queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch @@ -0,0 +1,33 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: =?UTF-8?q?Salva=20Peir=C3=B3?= +Date: Tue, 17 Dec 2013 10:06:30 +0100 +Subject: hamradio/yam: fix info leak in ioctl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Salva Peiró + +[ Upstream commit 8e3fbf870481eb53b2d3a322d1fc395ad8b367ed ] + +The yam_ioctl() code fails to initialise the cmd field +of the struct yamdrv_ioctl_cfg. Add an explicit memset(0) +before filling the structure to avoid the 4-byte info leak. + +Signed-off-by: Salva Peiró +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hamradio/yam.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/hamradio/yam.c ++++ b/drivers/net/hamradio/yam.c +@@ -1058,6 +1058,7 @@ static int yam_ioctl(struct net_device * + break; + + case SIOCYAMGCFG: ++ memset(&yi, 0, sizeof(yi)); + yi.cfg.mask = 0xffffffff; + yi.cfg.iobase = yp->iobase; + yi.cfg.irq = yp->irq; diff --git a/queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch b/queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch new file mode 100644 index 00000000000..3dd636632d9 --- /dev/null +++ b/queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= +Date: Mon, 16 Dec 2013 11:02:09 +0200 +Subject: ip_gre: fix msg_name parsing for recvfrom/recvmsg +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Timo Teräs + +[ Upstream commit 0e3da5bb8da45890b1dc413404e0f978ab71173e ] + +ipgre_header_parse() needs to parse the tunnel's ip header and it +uses mac_header to locate the iphdr. This got broken when gre tunneling +was refactored as mac_header is no longer updated to point to iphdr. +Introduce skb_pop_mac_header() helper to do the mac_header assignment +and use it in ipgre_rcv() to fix msg_name parsing. + +Bug introduced in commit c54419321455 (GRE: Refactor GRE tunneling code.) + +Cc: Pravin B Shelar +Signed-off-by: Timo Teräs +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -335,6 +335,7 @@ static int ipgre_rcv(struct sk_buff *skb + iph->saddr, iph->daddr, tpi.key); + + if (tunnel) { ++ skb_pop_mac_header(skb); + ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error); + return 0; + } diff --git a/queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch b/queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch new file mode 100644 index 00000000000..1fea43ff10c --- /dev/null +++ b/queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Li RongQing +Date: Thu, 19 Dec 2013 12:40:26 +0800 +Subject: ipv6: always set the new created dst's from in ip6_rt_copy + +From: Li RongQing + +[ Upstream commit 24f5b855e17df7e355eacd6c4a12cc4d6a6c9ff0 ] + +ip6_rt_copy only sets dst.from if ort has flag RTF_ADDRCONF and RTF_DEFAULT. +but the prefix routes which did get installed by hand locally can have an +expiration, and no any flag combination which can ensure a potential from +does never expire, so we should always set the new created dst's from. + +This also fixes the new created dst is always expired since the ort, which +is created by RA, maybe has RTF_EXPIRES and RTF_ADDRCONF, but no RTF_DEFAULT. + +Suggested-by: Hannes Frederic Sowa +CC: Gao feng +Signed-off-by: Li RongQing +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1838,9 +1838,7 @@ static struct rt6_info *ip6_rt_copy(stru + else + rt->rt6i_gateway = *dest; + rt->rt6i_flags = ort->rt6i_flags; +- if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == +- (RTF_DEFAULT | RTF_ADDRCONF)) +- rt6_set_from(rt, ort); ++ rt6_set_from(rt, ort); + rt->rt6i_metric = 0; + + #ifdef CONFIG_IPV6_SUBTREES diff --git a/queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch b/queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch new file mode 100644 index 00000000000..50aab48978f --- /dev/null +++ b/queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Hannes Frederic Sowa +Date: Sat, 7 Dec 2013 03:33:45 +0100 +Subject: ipv6: don't count addrconf generated routes against gc limit + +From: Hannes Frederic Sowa + +[ Upstream commit a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f ] + +Brett Ciphery reported that new ipv6 addresses failed to get installed +because the addrconf generated dsts where counted against the dst gc +limit. We don't need to count those routes like we currently don't count +administratively added routes. + +Because the max_addresses check enforces a limit on unbounded address +generation first in case someone plays with router advertisments, we +are still safe here. + +Reported-by: Brett Ciphery +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -2099,12 +2099,10 @@ struct rt6_info *addrconf_dst_alloc(stru + bool anycast) + { + struct net *net = dev_net(idev->dev); +- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); +- +- if (!rt) { +- net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); ++ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, ++ DST_NOCOUNT, NULL); ++ if (!rt) + return ERR_PTR(-ENOMEM); +- } + + in6_dev_hold(idev); + diff --git a/queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch b/queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch new file mode 100644 index 00000000000..2097cdce2f5 --- /dev/null +++ b/queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch @@ -0,0 +1,39 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Hannes Frederic Sowa +Date: Fri, 13 Dec 2013 15:12:27 +0100 +Subject: ipv6: fix illegal mac_header comparison on 32bit + +From: Hannes Frederic Sowa + +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 5 +++++ + net/ipv6/udp_offload.c | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1741,6 +1741,11 @@ static inline void skb_set_mac_header(st + } + #endif /* NET_SKBUFF_DATA_USES_OFFSET */ + ++static inline void skb_pop_mac_header(struct sk_buff *skb) ++{ ++ skb->mac_header = skb->network_header; ++} ++ + static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) + { +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -85,7 +85,7 @@ static struct sk_buff *udp6_ufo_fragment + + /* Check if there is enough headroom to insert fragment header. */ + tnl_hlen = skb_tnl_header_len(skb); +- if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { ++ if (skb_mac_header(skb) < skb->head + tnl_hlen + frag_hdr_sz) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } diff --git a/queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch b/queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch new file mode 100644 index 00000000000..e792dfb22d2 --- /dev/null +++ b/queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch @@ -0,0 +1,97 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Kamala R +Date: Mon, 2 Dec 2013 19:55:21 +0530 +Subject: IPv6: Fixed support for blackhole and prohibit routes + +From: Kamala R + +[ Upstream commit 7150aede5dd241539686e17d9592f5ebd28a2cda ] + +The behaviour of blackhole and prohibit routes has been corrected by setting +the input and output pointers of the dst variable appropriately. For +blackhole routes, they are set to dst_discard and to ip6_pkt_discard and +ip6_pkt_discard_out respectively for prohibit routes. + +ipv6: ip6_pkt_prohibit(_out) should not depend on +CONFIG_IPV6_MULTIPLE_TABLES + +We need ip6_pkt_prohibit(_out) available without +CONFIG_IPV6_MULTIPLE_TABLES + +Signed-off-by: Kamala R +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -84,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops * + + static int ip6_pkt_discard(struct sk_buff *skb); + static int ip6_pkt_discard_out(struct sk_buff *skb); ++static int ip6_pkt_prohibit(struct sk_buff *skb); ++static int ip6_pkt_prohibit_out(struct sk_buff *skb); + static void ip6_link_failure(struct sk_buff *skb); + static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +@@ -233,9 +235,6 @@ static const struct rt6_info ip6_null_en + + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + +-static int ip6_pkt_prohibit(struct sk_buff *skb); +-static int ip6_pkt_prohibit_out(struct sk_buff *skb); +- + static const struct rt6_info ip6_prohibit_entry_template = { + .dst = { + .__refcnt = ATOMIC_INIT(1), +@@ -1498,21 +1497,24 @@ int ip6_route_add(struct fib6_config *cf + goto out; + } + } +- rt->dst.output = ip6_pkt_discard_out; +- rt->dst.input = ip6_pkt_discard; + rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; + switch (cfg->fc_type) { + case RTN_BLACKHOLE: + rt->dst.error = -EINVAL; ++ rt->dst.output = dst_discard; ++ rt->dst.input = dst_discard; + break; + case RTN_PROHIBIT: + rt->dst.error = -EACCES; ++ rt->dst.output = ip6_pkt_prohibit_out; ++ rt->dst.input = ip6_pkt_prohibit; + break; + case RTN_THROW: +- rt->dst.error = -EAGAIN; +- break; + default: +- rt->dst.error = -ENETUNREACH; ++ rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN ++ : -ENETUNREACH; ++ rt->dst.output = ip6_pkt_discard_out; ++ rt->dst.input = ip6_pkt_discard; + break; + } + goto install_route; +@@ -2077,8 +2079,6 @@ static int ip6_pkt_discard_out(struct sk + return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); + } + +-#ifdef CONFIG_IPV6_MULTIPLE_TABLES +- + static int ip6_pkt_prohibit(struct sk_buff *skb) + { + return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); +@@ -2090,8 +2090,6 @@ static int ip6_pkt_prohibit_out(struct s + return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); + } + +-#endif +- + /* + * Allocate a dst for local (unicast / anycast) address. + */ diff --git a/queue-3.10/macvtap-do-not-double-count-received-packets.patch b/queue-3.10/macvtap-do-not-double-count-received-packets.patch new file mode 100644 index 00000000000..e105a205b41 --- /dev/null +++ b/queue-3.10/macvtap-do-not-double-count-received-packets.patch @@ -0,0 +1,48 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Vlad Yasevich +Date: Tue, 26 Nov 2013 12:37:12 -0500 +Subject: macvtap: Do not double-count received packets + +From: Vlad Yasevich + +[ Upstream commit 006da7b07bc4d3a7ffabad17cf639eec6849c9dc ] + +Currently macvlan will count received packets after calling each +vlans receive handler. Macvtap attempts to count the packet +yet again when the user reads the packet from the tap socket. +This code doesn't do this consistently either. Remove the +counting from macvtap and let only macvlan count received +packets. + +Signed-off-by: Vlad Yasevich +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 7 ------- + 1 file changed, 7 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -797,7 +797,6 @@ static ssize_t macvtap_put_user(struct m + const struct sk_buff *skb, + const struct iovec *iv, int len) + { +- struct macvlan_dev *vlan; + int ret; + int vnet_hdr_len = 0; + int vlan_offset = 0; +@@ -851,12 +850,6 @@ static ssize_t macvtap_put_user(struct m + copied += len; + + done: +- rcu_read_lock_bh(); +- vlan = rcu_dereference_bh(q->vlan); +- if (vlan) +- macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0); +- rcu_read_unlock_bh(); +- + return ret ? ret : copied; + } + diff --git a/queue-3.10/macvtap-signal-truncated-packets.patch b/queue-3.10/macvtap-signal-truncated-packets.patch new file mode 100644 index 00000000000..aa63542c766 --- /dev/null +++ b/queue-3.10/macvtap-signal-truncated-packets.patch @@ -0,0 +1,76 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Jason Wang +Date: Wed, 11 Dec 2013 13:08:34 +0800 +Subject: macvtap: signal truncated packets + +From: Jason Wang + +[ Upstream commit ce232ce01d61b184202bb185103d119820e1260c ] + +macvtap_put_user() never return a value grater than iov length, this in fact +bypasses the truncated checking in macvtap_recvmsg(). Fix this by always +returning the size of packet plus the possible vlan header to let the trunca +checking work. + +Cc: Vlad Yasevich +Cc: Zhi Yong Wu +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Acked-by: Vlad Yasevich +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -800,7 +800,7 @@ static ssize_t macvtap_put_user(struct m + int ret; + int vnet_hdr_len = 0; + int vlan_offset = 0; +- int copied; ++ int copied, total; + + if (q->flags & IFF_VNET_HDR) { + struct virtio_net_hdr vnet_hdr; +@@ -815,7 +815,8 @@ static ssize_t macvtap_put_user(struct m + if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) + return -EFAULT; + } +- copied = vnet_hdr_len; ++ total = copied = vnet_hdr_len; ++ total += skb->len; + + if (!vlan_tx_tag_present(skb)) + len = min_t(int, skb->len, len); +@@ -830,6 +831,7 @@ static ssize_t macvtap_put_user(struct m + + vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); + len = min_t(int, skb->len + VLAN_HLEN, len); ++ total += VLAN_HLEN; + + copy = min_t(int, vlan_offset, len); + ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); +@@ -847,10 +849,9 @@ static ssize_t macvtap_put_user(struct m + } + + ret = skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len); +- copied += len; + + done: +- return ret ? ret : copied; ++ return ret ? ret : total; + } + + static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, +@@ -902,7 +903,7 @@ static ssize_t macvtap_aio_read(struct k + } + + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); +- ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ ++ ret = min_t(ssize_t, ret, len); + if (ret > 0) + iocb->ki_pos = ret; + out: diff --git a/queue-3.10/macvtap-update-file-current-position.patch b/queue-3.10/macvtap-update-file-current-position.patch new file mode 100644 index 00000000000..fae617765e5 --- /dev/null +++ b/queue-3.10/macvtap-update-file-current-position.patch @@ -0,0 +1,27 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Zhi Yong Wu +Date: Fri, 6 Dec 2013 14:16:50 +0800 +Subject: macvtap: update file current position + +From: Zhi Yong Wu + +[ Upstream commit e6ebc7f16ca1434a334647aa56399c546be4e64b ] + +Signed-off-by: Zhi Yong Wu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -903,6 +903,8 @@ static ssize_t macvtap_aio_read(struct k + + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ ++ if (ret > 0) ++ iocb->ki_pos = ret; + out: + return ret; + } diff --git a/queue-3.10/net-do-not-pretend-fraglist-support.patch b/queue-3.10/net-do-not-pretend-fraglist-support.patch new file mode 100644 index 00000000000..57e18e6463d --- /dev/null +++ b/queue-3.10/net-do-not-pretend-fraglist-support.patch @@ -0,0 +1,74 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Eric Dumazet +Date: Mon, 2 Dec 2013 08:51:13 -0800 +Subject: net: do not pretend FRAGLIST support + +From: Eric Dumazet + +[ Upstream commit 28e24c62ab3062e965ef1b3bcc244d50aee7fa85 ] + +Few network drivers really supports frag_list : virtual drivers. + +Some drivers wrongly advertise NETIF_F_FRAGLIST feature. + +If skb with a frag_list is given to them, packet on the wire will be +corrupt. + +Remove this flag, as core networking stack will make sure to +provide packets that can be sent without corruption. + +Signed-off-by: Eric Dumazet +Cc: Thadeu Lima de Souza Cascardo +Cc: Anirudha Sarangi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 +- + drivers/net/ethernet/tehuti/tehuti.c | 1 - + drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- + 4 files changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c ++++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c +@@ -3023,7 +3023,7 @@ static struct ehea_port *ehea_setup_sing + + dev->hw_features = NETIF_F_SG | NETIF_F_TSO | + NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX; +- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | ++ dev->features = NETIF_F_SG | NETIF_F_TSO | + NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM; +--- a/drivers/net/ethernet/tehuti/tehuti.c ++++ b/drivers/net/ethernet/tehuti/tehuti.c +@@ -2019,7 +2019,6 @@ bdx_probe(struct pci_dev *pdev, const st + ndev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO + | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM +- /*| NETIF_F_FRAGLIST */ + ; + ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_HW_VLAN_CTAG_TX; +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -1016,7 +1016,7 @@ static int temac_of_probe(struct platfor + dev_set_drvdata(&op->dev, ndev); + SET_NETDEV_DEV(ndev, &op->dev); + ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ +- ndev->features = NETIF_F_SG | NETIF_F_FRAGLIST; ++ ndev->features = NETIF_F_SG; + ndev->netdev_ops = &temac_netdev_ops; + ndev->ethtool_ops = &temac_ethtool_ops; + #if 0 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -1488,7 +1488,7 @@ static int axienet_of_probe(struct platf + + SET_NETDEV_DEV(ndev, &op->dev); + ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ +- ndev->features = NETIF_F_SG | NETIF_F_FRAGLIST; ++ ndev->features = NETIF_F_SG; + ndev->netdev_ops = &axienet_netdev_ops; + ndev->ethtool_ops = &axienet_ethtool_ops; + diff --git a/queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch b/queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch new file mode 100644 index 00000000000..794be69ce8e --- /dev/null +++ b/queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch @@ -0,0 +1,29 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Changli Gao +Date: Sun, 8 Dec 2013 09:36:56 -0500 +Subject: net: drop_monitor: fix the value of maxattr + +From: Changli Gao + +[ Upstream commit d323e92cc3f4edd943610557c9ea1bb4bb5056e8 ] + +maxattr in genl_family should be used to save the max attribute +type, but not the max command type. Drop monitor doesn't support +any attributes, so we should leave it as zero. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/drop_monitor.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -64,7 +64,6 @@ static struct genl_family net_drop_monit + .hdrsize = 0, + .name = "NET_DM", + .version = 2, +- .maxattr = NET_DM_CMD_MAX, + }; + + static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); diff --git a/queue-3.10/net-fec-fix-potential-use-after-free.patch b/queue-3.10/net-fec-fix-potential-use-after-free.patch new file mode 100644 index 00000000000..f1f02ef5383 --- /dev/null +++ b/queue-3.10/net-fec-fix-potential-use-after-free.patch @@ -0,0 +1,45 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Eric Dumazet +Date: Thu, 19 Dec 2013 10:53:02 -0800 +Subject: net: fec: fix potential use after free + +From: Eric Dumazet + +[ Upstream commit 7a2a84518cfb263d2c4171b3d63671f88316adb2 ] + +skb_tx_timestamp(skb) should be called _before_ TX completion +has a chance to trigger, otherwise it is too late and we access +freed memory. + +Signed-off-by: Eric Dumazet +Fixes: de5fb0a05348 ("net: fec: put tx to napi poll function to fix dead lock") +Cc: Frank Li +Cc: Richard Cochran +Acked-by: Richard Cochran +Acked-by: Frank Li +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/fec_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -371,6 +371,8 @@ fec_enet_start_xmit(struct sk_buff *skb, + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + ++ skb_tx_timestamp(skb); ++ + fep->cur_tx = bdp; + + if (fep->cur_tx == fep->dirty_tx) +@@ -379,8 +381,6 @@ fec_enet_start_xmit(struct sk_buff *skb, + /* Trigger transmission start */ + writel(0, fep->hwp + FEC_X_DES_ACTIVE); + +- skb_tx_timestamp(skb); +- + return NETDEV_TX_OK; + } + diff --git a/queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch b/queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch new file mode 100644 index 00000000000..dd0e12a9234 --- /dev/null +++ b/queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch @@ -0,0 +1,88 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Daniel Borkmann +Date: Tue, 17 Dec 2013 00:38:39 +0100 +Subject: net: inet_diag: zero out uninitialized idiag_{src,dst} fields + +From: Daniel Borkmann + +[ Upstream commit b1aac815c0891fe4a55a6b0b715910142227700f ] + +Jakub reported while working with nlmon netlink sniffer that parts of +the inet_diag_sockid are not initialized when r->idiag_family != AF_INET6. +That is, fields of r->id.idiag_src[1 ... 3], r->id.idiag_dst[1 ... 3]. + +In fact, it seems that we can leak 6 * sizeof(u32) byte of kernel [slab] +memory through this. At least, in udp_dump_one(), we allocate a skb in ... + + rep = nlmsg_new(sizeof(struct inet_diag_msg) + ..., GFP_KERNEL); + +... and then pass that to inet_sk_diag_fill() that puts the whole struct +inet_diag_msg into the skb, where we only fill out r->id.idiag_src[0], +r->id.idiag_dst[0] and leave the rest untouched: + + r->id.idiag_src[0] = inet->inet_rcv_saddr; + r->id.idiag_dst[0] = inet->inet_daddr; + +struct inet_diag_msg embeds struct inet_diag_sockid that is correctly / +fully filled out in IPv6 case, but for IPv4 not. + +So just zero them out by using plain memset (for this little amount of +bytes it's probably not worth the extra check for idiag_family == AF_INET). + +Similarly, fix also other places where we fill that out. + +Reported-by: Jakub Zawadzki +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_diag.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/net/ipv4/inet_diag.c ++++ b/net/ipv4/inet_diag.c +@@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, s + + r->id.idiag_sport = inet->inet_sport; + r->id.idiag_dport = inet->inet_dport; ++ ++ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); ++ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); ++ + r->id.idiag_src[0] = inet->inet_rcv_saddr; + r->id.idiag_dst[0] = inet->inet_daddr; + +@@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct in + + r->idiag_family = tw->tw_family; + r->idiag_retrans = 0; ++ + r->id.idiag_if = tw->tw_bound_dev_if; + sock_diag_save_cookie(tw, r->id.idiag_cookie); ++ + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; ++ ++ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); ++ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); ++ + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; ++ + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); +@@ -732,8 +743,13 @@ static int inet_diag_fill_req(struct sk_ + + r->id.idiag_sport = inet->inet_sport; + r->id.idiag_dport = ireq->rmt_port; ++ ++ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); ++ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); ++ + r->id.idiag_src[0] = ireq->loc_addr; + r->id.idiag_dst[0] = ireq->rmt_addr; ++ + r->idiag_expires = jiffies_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; diff --git a/queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch b/queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch new file mode 100644 index 00000000000..75032c315cc --- /dev/null +++ b/queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch @@ -0,0 +1,65 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Daniel Borkmann +Date: Mon, 30 Dec 2013 23:40:50 +0100 +Subject: net: llc: fix use after free in llc_ui_recvmsg + +From: Daniel Borkmann + +[ Upstream commit 4d231b76eef6c4a6bd9c96769e191517765942cb ] + +While commit 30a584d944fb fixes datagram interface in LLC, a use +after free bug has been introduced for SOCK_STREAM sockets that do +not make use of MSG_PEEK. + +The flow is as follow ... + + if (!(flags & MSG_PEEK)) { + ... + sk_eat_skb(sk, skb, false); + ... + } + ... + if (used + offset < skb->len) + continue; + +... where sk_eat_skb() calls __kfree_skb(). Therefore, cache +original length and work on skb_len to check partial reads. + +Fixes: 30a584d944fb ("[LLX]: SOCK_DGRAM interface fixes") +Signed-off-by: Daniel Borkmann +Cc: Stephen Hemminger +Cc: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/llc/af_llc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -715,7 +715,7 @@ static int llc_ui_recvmsg(struct kiocb * + unsigned long cpu_flags; + size_t copied = 0; + u32 peek_seq = 0; +- u32 *seq; ++ u32 *seq, skb_len; + unsigned long used; + int target; /* Read at least this many bytes */ + long timeo; +@@ -812,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb * + } + continue; + found_ok_skb: ++ skb_len = skb->len; + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) +@@ -844,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb * + } + + /* Partial read */ +- if (used + offset < skb->len) ++ if (used + offset < skb_len) + continue; + } while (len > 0); + diff --git a/queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch b/queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch new file mode 100644 index 00000000000..30379bc39eb --- /dev/null +++ b/queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Simon Horman +Date: Sun, 19 May 2013 15:46:49 +0000 +Subject: net: Loosen constraints for recalculating checksum in skb_segment() + +From: Simon Horman + +[ Upstream commit 1cdbcb7957cf9e5f841dbcde9b38fd18a804208b ] + +This is a generic solution to resolve a specific problem that I have observed. + +If the encapsulation of an skb changes then ability to offload checksums +may also change. In particular it may be necessary to perform checksumming +in software. + +An example of such a case is where a non-GRE packet is received but +is to be encapsulated and transmitted as GRE. + +Another example relates to my proposed support for for packets +that are non-MPLS when received but MPLS when transmitted. + +The cost of this change is that the value of the csum variable may be +checked when it previously was not. In the case where the csum variable is +true this is pure overhead. In the case where the csum variable is false it +leads to software checksumming, which I believe also leads to correct +checksums in transmitted packets for the cases described above. + +Further analysis: + +This patch relies on the return value of can_checksum_protocol() +being correct and in turn the return value of skb_network_protocol(), +used to provide the protocol parameter of can_checksum_protocol(), +being correct. It also relies on the features passed to skb_segment() +and in turn to can_checksum_protocol() being correct. + +I believe that this problem has not been observed for VLANs because it +appears that almost all drivers, the exception being xgbe, set +vlan_features such that that the checksum offload support for VLAN packets +is greater than or equal to that of non-VLAN packets. + +I wonder if the code in xgbe may be an oversight and the hardware does +support checksumming of VLAN packets. If so it may be worth updating the +vlan_features of the driver as this patch will force such checksums to be +performed in software rather than hardware. + +Signed-off-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2854,7 +2854,7 @@ struct sk_buff *skb_segment(struct sk_bu + doffset + tnl_hlen); + + if (fskb != skb_shinfo(skb)->frag_list) +- continue; ++ goto perform_csum_check; + + if (!sg) { + nskb->ip_summed = CHECKSUM_NONE; +@@ -2918,6 +2918,7 @@ skip_fraglist: + nskb->len += nskb->data_len; + nskb->truesize += nskb->data_len; + ++perform_csum_check: + if (!csum) { + nskb->csum = skb_checksum(nskb, doffset, + nskb->len - doffset, 0); diff --git a/queue-3.10/net-rose-restore-old-recvmsg-behavior.patch b/queue-3.10/net-rose-restore-old-recvmsg-behavior.patch new file mode 100644 index 00000000000..e15ebb3599d --- /dev/null +++ b/queue-3.10/net-rose-restore-old-recvmsg-behavior.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Florian Westphal +Date: Mon, 23 Dec 2013 00:32:31 +0100 +Subject: net: rose: restore old recvmsg behavior + +From: Florian Westphal + +[ Upstream commit f81152e35001e91997ec74a7b4e040e6ab0acccf ] + +recvmsg handler in net/rose/af_rose.c performs size-check ->msg_namelen. + +After commit f3d3342602f8bcbf37d7c46641cb9bca7618eb1c +(net: rework recvmsg handler msg_name and msg_namelen logic), we now +always take the else branch due to namelen being initialized to 0. + +Digging in netdev-vger-cvs git repo shows that msg_namelen was +initialized with a fixed-size since at least 1995, so the else branch +was never taken. + +Compile tested only. + +Signed-off-by: Florian Westphal +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rose/af_rose.c | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +--- a/net/rose/af_rose.c ++++ b/net/rose/af_rose.c +@@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *io + + if (msg->msg_name) { + struct sockaddr_rose *srose; ++ struct full_sockaddr_rose *full_srose = msg->msg_name; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; +@@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *io + srose->srose_addr = rose->dest_addr; + srose->srose_call = rose->dest_call; + srose->srose_ndigis = rose->dest_ndigis; +- if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { +- struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; +- for (n = 0 ; n < rose->dest_ndigis ; n++) +- full_srose->srose_digis[n] = rose->dest_digis[n]; +- msg->msg_namelen = sizeof(struct full_sockaddr_rose); +- } else { +- if (rose->dest_ndigis >= 1) { +- srose->srose_ndigis = 1; +- srose->srose_digi = rose->dest_digis[0]; +- } +- msg->msg_namelen = sizeof(struct sockaddr_rose); +- } ++ for (n = 0 ; n < rose->dest_ndigis ; n++) ++ full_srose->srose_digis[n] = rose->dest_digis[n]; ++ msg->msg_namelen = sizeof(struct full_sockaddr_rose); + } + + skb_free_datagram(sk, skb); diff --git a/queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch b/queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch new file mode 100644 index 00000000000..9d847c85025 --- /dev/null +++ b/queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch @@ -0,0 +1,47 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Sasha Levin +Date: Fri, 13 Dec 2013 10:54:22 -0500 +Subject: net: unix: allow bind to fail on mutex lock + +From: Sasha Levin + +[ Upstream commit 37ab4fa7844a044dc21fde45e2a0fc2f3c3b6490 ] + +This is similar to the set_peek_off patch where calling bind while the +socket is stuck in unix_dgram_recvmsg() will block and cause a hung task +spew after a while. + +This is also the last place that did a straightforward mutex_lock(), so +there shouldn't be any more of these patches. + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -717,7 +717,9 @@ static int unix_autobind(struct socket * + int err; + unsigned int retries = 0; + +- mutex_lock(&u->readlock); ++ err = mutex_lock_interruptible(&u->readlock); ++ if (err) ++ return err; + + err = 0; + if (u->addr) +@@ -876,7 +878,9 @@ static int unix_bind(struct socket *sock + goto out; + addr_len = err; + +- mutex_lock(&u->readlock); ++ err = mutex_lock_interruptible(&u->readlock); ++ if (err) ++ goto out; + + err = -EINVAL; + if (u->addr) diff --git a/queue-3.10/net-unix-allow-set_peek_off-to-fail.patch b/queue-3.10/net-unix-allow-set_peek_off-to-fail.patch new file mode 100644 index 00000000000..37db76fb6fe --- /dev/null +++ b/queue-3.10/net-unix-allow-set_peek_off-to-fail.patch @@ -0,0 +1,72 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Sasha Levin +Date: Sat, 7 Dec 2013 17:26:27 -0500 +Subject: net: unix: allow set_peek_off to fail + +From: Sasha Levin + +[ Upstream commit 12663bfc97c8b3fdb292428105dd92d563164050 ] + +unix_dgram_recvmsg() will hold the readlock of the socket until recv +is complete. + +In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until +unix_dgram_recvmsg() will complete (which can take a while) without allowing +us to break out of it, triggering a hung task spew. + +Instead, allow set_peek_off to fail, this way userspace will not hang. + +Signed-off-by: Sasha Levin +Acked-by: Pavel Emelyanov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/net.h | 2 +- + net/core/sock.c | 2 +- + net/unix/af_unix.c | 8 ++++++-- + 3 files changed, 8 insertions(+), 4 deletions(-) + +--- a/include/linux/net.h ++++ b/include/linux/net.h +@@ -180,7 +180,7 @@ struct proto_ops { + int offset, size_t size, int flags); + ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); +- void (*set_peek_off)(struct sock *sk, int val); ++ int (*set_peek_off)(struct sock *sk, int val); + }; + + #define DECLARE_SOCKADDR(type, dst, src) \ +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -885,7 +885,7 @@ set_rcvbuf: + + case SO_PEEK_OFF: + if (sock->ops->set_peek_off) +- sock->ops->set_peek_off(sk, val); ++ ret = sock->ops->set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + break; +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -529,13 +529,17 @@ static int unix_seqpacket_sendmsg(struct + static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, + struct msghdr *, size_t, int); + +-static void unix_set_peek_off(struct sock *sk, int val) ++static int unix_set_peek_off(struct sock *sk, int val) + { + struct unix_sock *u = unix_sk(sk); + +- mutex_lock(&u->readlock); ++ if (mutex_lock_interruptible(&u->readlock)) ++ return -EINTR; ++ + sk->sk_peek_off = val; + mutex_unlock(&u->readlock); ++ ++ return 0; + } + + diff --git a/queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch b/queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch new file mode 100644 index 00000000000..feabd84d301 --- /dev/null +++ b/queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch @@ -0,0 +1,49 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: "David S. Miller" +Date: Thu, 2 Jan 2014 19:50:52 -0500 +Subject: netpoll: Fix missing TXQ unlock and and OOPS. + +From: "David S. Miller" + +[ Upstream commit aca5f58f9ba803ec8c2e6bcf890db17589e8dfcc ] + +The VLAN tag handling code in netpoll_send_skb_on_dev() has two problems. + +1) It exits without unlocking the TXQ. + +2) It then tries to queue a NULL skb to npinfo->txq. + +Reported-by: Ahmed Tamrawi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/netpoll.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netp + !vlan_hw_offload_capable(netif_skb_features(skb), + skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); +- if (unlikely(!skb)) +- break; ++ if (unlikely(!skb)) { ++ /* This is actually a packet drop, but we ++ * don't want the code at the end of this ++ * function to try and re-queue a NULL skb. ++ */ ++ status = NETDEV_TX_OK; ++ goto unlock_txq; ++ } + skb->vlan_tci = 0; + } + +@@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netp + if (status == NETDEV_TX_OK) + txq_trans_update(txq); + } ++ unlock_txq: + __netif_tx_unlock(txq); + + if (status == NETDEV_TX_OK) diff --git a/queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch b/queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch new file mode 100644 index 00000000000..76baed676b2 --- /dev/null +++ b/queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch @@ -0,0 +1,90 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Jason Wang +Date: Fri, 13 Dec 2013 17:21:27 +0800 +Subject: netvsc: don't flush peers notifying work during setting mtu + +From: Jason Wang + +[ Upstream commit 50dc875f2e6e2e04aed3b3033eb0ac99192d6d02 ] + +There's a possible deadlock if we flush the peers notifying work during setting +mtu: + +[ 22.991149] ====================================================== +[ 22.991173] [ INFO: possible circular locking dependency detected ] +[ 22.991198] 3.10.0-54.0.1.el7.x86_64.debug #1 Not tainted +[ 22.991219] ------------------------------------------------------- +[ 22.991243] ip/974 is trying to acquire lock: +[ 22.991261] ((&(&net_device_ctx->dwork)->work)){+.+.+.}, at: [] flush_work+0x5/0x2e0 +[ 22.991307] +but task is already holding lock: +[ 22.991330] (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1b/0x40 +[ 22.991367] +which lock already depends on the new lock. + +[ 22.991398] +the existing dependency chain (in reverse order) is: +[ 22.991426] +-> #1 (rtnl_mutex){+.+.+.}: +[ 22.991449] [] __lock_acquire+0xb19/0x1260 +[ 22.991477] [] lock_acquire+0xa2/0x1f0 +[ 22.991501] [] mutex_lock_nested+0x89/0x4f0 +[ 22.991529] [] rtnl_lock+0x17/0x20 +[ 22.991552] [] netdev_notify_peers+0x12/0x30 +[ 22.991579] [] netvsc_send_garp+0x22/0x30 [hv_netvsc] +[ 22.991610] [] process_one_work+0x211/0x6e0 +[ 22.991637] [] worker_thread+0x11b/0x3a0 +[ 22.991663] [] kthread+0xed/0x100 +[ 22.991686] [] ret_from_fork+0x7c/0xb0 +[ 22.991715] +-> #0 ((&(&net_device_ctx->dwork)->work)){+.+.+.}: +[ 22.991715] [] check_prevs_add+0x967/0x970 +[ 22.991715] [] __lock_acquire+0xb19/0x1260 +[ 22.991715] [] lock_acquire+0xa2/0x1f0 +[ 22.991715] [] flush_work+0x4e/0x2e0 +[ 22.991715] [] __cancel_work_timer+0x95/0x130 +[ 22.991715] [] cancel_delayed_work_sync+0x13/0x20 +[ 22.991715] [] netvsc_change_mtu+0x84/0x200 [hv_netvsc] +[ 22.991715] [] dev_set_mtu+0x34/0x80 +[ 22.991715] [] do_setlink+0x23a/0xa00 +[ 22.991715] [] rtnl_newlink+0x394/0x5e0 +[ 22.991715] [] rtnetlink_rcv_msg+0x9c/0x260 +[ 22.991715] [] netlink_rcv_skb+0xa9/0xc0 +[ 22.991715] [] rtnetlink_rcv+0x2a/0x40 +[ 22.991715] [] netlink_unicast+0xdd/0x190 +[ 22.991715] [] netlink_sendmsg+0x337/0x750 +[ 22.991715] [] sock_sendmsg+0x99/0xd0 +[ 22.991715] [] ___sys_sendmsg+0x39e/0x3b0 +[ 22.991715] [] __sys_sendmsg+0x42/0x80 +[ 22.991715] [] SyS_sendmsg+0x12/0x20 +[ 22.991715] [] system_call_fastpath+0x16/0x1b + +This is because we hold the rtnl_lock() before ndo_change_mtu() and try to flush +the work in netvsc_change_mtu(), in the mean time, netdev_notify_peers() may be +called from worker and also trying to hold the rtnl_lock. This will lead the +flush won't succeed forever. Solve this by not canceling and flushing the work, +this is safe because the transmission done by NETDEV_NOTIFY_PEERS was +synchronized with the netif_tx_disable() called by netvsc_change_mtu(). + +Reported-by: Yaju Cao +Tested-by: Yaju Cao +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Signed-off-by: Jason Wang +Acked-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -328,7 +328,6 @@ static int netvsc_change_mtu(struct net_ + return -EINVAL; + + nvdev->start_remove = true; +- cancel_delayed_work_sync(&ndevctx->dwork); + cancel_work_sync(&ndevctx->work); + netif_tx_disable(ndev); + rndis_filter_device_remove(hdev); diff --git a/queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch b/queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch new file mode 100644 index 00000000000..53947a685dc --- /dev/null +++ b/queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch @@ -0,0 +1,206 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Daniel Borkmann +Date: Fri, 6 Dec 2013 11:36:15 +0100 +Subject: packet: fix send path when running with proto == 0 + +From: Daniel Borkmann + +[ Upstream commit 66e56cd46b93ef407c60adcac62cf33b06119d50 ] + +Commit e40526cb20b5 introduced a cached dev pointer, that gets +hooked into register_prot_hook(), __unregister_prot_hook() to +update the device used for the send path. + +We need to fix this up, as otherwise this will not work with +sockets created with protocol = 0, plus with sll_protocol = 0 +passed via sockaddr_ll when doing the bind. + +So instead, assign the pointer directly. The compiler can inline +these helper functions automagically. + +While at it, also assume the cached dev fast-path as likely(), +and document this variant of socket creation as it seems it is +not widely used (seems not even the author of TX_RING was aware +of that in his reference example [1]). Tested with reproducer +from e40526cb20b5. + + [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example + +Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released") +Signed-off-by: Daniel Borkmann +Tested-by: Salam Noureddine +Tested-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/packet_mmap.txt | 10 ++++ + net/packet/af_packet.c | 65 +++++++++++++++++++------------ + 2 files changed, 50 insertions(+), 25 deletions(-) + +--- a/Documentation/networking/packet_mmap.txt ++++ b/Documentation/networking/packet_mmap.txt +@@ -123,6 +123,16 @@ Transmission process is similar to captu + [shutdown] close() --------> destruction of the transmission socket and + deallocation of all associated resources. + ++Socket creation and destruction is also straight forward, and is done ++the same way as in capturing described in the previous paragraph: ++ ++ int fd = socket(PF_PACKET, mode, 0); ++ ++The protocol can optionally be 0 in case we only want to transmit ++via this socket, which avoids an expensive call to packet_rcv(). ++In this case, you also need to bind(2) the TX_RING with sll_protocol = 0 ++set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example. ++ + Binding the socket to your network interface is mandatory (with zero copy) to + know the header size of frames used in the circular buffer. + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -237,6 +237,30 @@ struct packet_skb_cb { + static void __fanout_unlink(struct sock *sk, struct packet_sock *po); + static void __fanout_link(struct sock *sk, struct packet_sock *po); + ++static struct net_device *packet_cached_dev_get(struct packet_sock *po) ++{ ++ struct net_device *dev; ++ ++ rcu_read_lock(); ++ dev = rcu_dereference(po->cached_dev); ++ if (likely(dev)) ++ dev_hold(dev); ++ rcu_read_unlock(); ++ ++ return dev; ++} ++ ++static void packet_cached_dev_assign(struct packet_sock *po, ++ struct net_device *dev) ++{ ++ rcu_assign_pointer(po->cached_dev, dev); ++} ++ ++static void packet_cached_dev_reset(struct packet_sock *po) ++{ ++ RCU_INIT_POINTER(po->cached_dev, NULL); ++} ++ + /* register_prot_hook must be invoked with the po->bind_lock held, + * or from a context in which asynchronous accesses to the packet + * socket is not possible (packet_create()). +@@ -246,12 +270,10 @@ static void register_prot_hook(struct so + struct packet_sock *po = pkt_sk(sk); + + if (!po->running) { +- if (po->fanout) { ++ if (po->fanout) + __fanout_link(sk, po); +- } else { ++ else + dev_add_pack(&po->prot_hook); +- rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); +- } + + sock_hold(sk); + po->running = 1; +@@ -270,12 +292,11 @@ static void __unregister_prot_hook(struc + struct packet_sock *po = pkt_sk(sk); + + po->running = 0; +- if (po->fanout) { ++ ++ if (po->fanout) + __fanout_unlink(sk, po); +- } else { ++ else + __dev_remove_pack(&po->prot_hook); +- RCU_INIT_POINTER(po->cached_dev, NULL); +- } + + __sock_put(sk); + +@@ -2048,19 +2069,6 @@ static int tpacket_fill_skb(struct packe + return tp_len; + } + +-static struct net_device *packet_cached_dev_get(struct packet_sock *po) +-{ +- struct net_device *dev; +- +- rcu_read_lock(); +- dev = rcu_dereference(po->cached_dev); +- if (dev) +- dev_hold(dev); +- rcu_read_unlock(); +- +- return dev; +-} +- + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + { + struct sk_buff *skb; +@@ -2077,7 +2085,7 @@ static int tpacket_snd(struct packet_soc + + mutex_lock(&po->pg_vec_lock); + +- if (saddr == NULL) { ++ if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); + proto = po->num; + addr = NULL; +@@ -2231,7 +2239,7 @@ static int packet_snd(struct socket *soc + * Get and verify the address. + */ + +- if (saddr == NULL) { ++ if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); + proto = po->num; + addr = NULL; +@@ -2440,6 +2448,8 @@ static int packet_release(struct socket + + spin_lock(&po->bind_lock); + unregister_prot_hook(sk, false); ++ packet_cached_dev_reset(po); ++ + if (po->prot_hook.dev) { + dev_put(po->prot_hook.dev); + po->prot_hook.dev = NULL; +@@ -2495,14 +2505,17 @@ static int packet_do_bind(struct sock *s + + spin_lock(&po->bind_lock); + unregister_prot_hook(sk, true); ++ + po->num = protocol; + po->prot_hook.type = protocol; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); +- po->prot_hook.dev = dev; + ++ po->prot_hook.dev = dev; + po->ifindex = dev ? dev->ifindex : 0; + ++ packet_cached_dev_assign(po, dev); ++ + if (protocol == 0) + goto out_unlock; + +@@ -2615,7 +2628,8 @@ static int packet_create(struct net *net + po = pkt_sk(sk); + sk->sk_family = PF_PACKET; + po->num = proto; +- RCU_INIT_POINTER(po->cached_dev, NULL); ++ ++ packet_cached_dev_reset(po); + + sk->sk_destruct = packet_sock_destruct; + sk_refcnt_debug_inc(sk); +@@ -3369,6 +3383,7 @@ static int packet_notifier(struct notifi + sk->sk_error_report(sk); + } + if (msg == NETDEV_UNREGISTER) { ++ packet_cached_dev_reset(po); + po->ifindex = -1; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); diff --git a/queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch b/queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch new file mode 100644 index 00000000000..ebd954e7151 --- /dev/null +++ b/queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch @@ -0,0 +1,90 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Venkat Venkatsubra +Date: Mon, 2 Dec 2013 15:41:39 -0800 +Subject: rds: prevent BUG_ON triggered on congestion update to loopback + +From: Venkat Venkatsubra + +[ Upstream commit 18fc25c94eadc52a42c025125af24657a93638c0 ] + +After congestion update on a local connection, when rds_ib_xmit returns +less bytes than that are there in the message, rds_send_xmit calls +back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE) +to trigger. + +For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually +the message contains 8240 bytes. rds_send_xmit thinks there is more to send +and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header) +=4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k]. + +The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f +"rds: prevent BUG_ON triggering on congestion map updates" introduced +this regression. That change was addressing the triggering of a different +BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE: + BUG_ON(ret != 0 && + conn->c_xmit_sg == rm->data.op_nents); +This was the sequence it was going through: +(rds_ib_xmit) +/* Do not send cong updates to IB loopback */ +if (conn->c_loopback + && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; +} +rds_ib_xmit returns 8240 +rds_send_xmit: + c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time) + = 8192 + c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again +rds_ib_xmit returns 8240 +rds_send_xmit: + c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again + and so on (c_xmit_data_off 24672,32912,41152,49392,57632) +rds_ib_xmit returns 8240 +On this iteration this sequence causes the BUG_ON in rds_send_xmit: + while (ret) { + tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off); + [tmp = 65536 - 57632 = 7904] + conn->c_xmit_data_off += tmp; + [c_xmit_data_off = 57632 + 7904 = 65536] + ret -= tmp; + [ret = 8240 - 7904 = 336] + if (conn->c_xmit_data_off == sg->length) { + conn->c_xmit_data_off = 0; + sg++; + conn->c_xmit_sg++; + BUG_ON(ret != 0 && + conn->c_xmit_sg == rm->data.op_nents); + [c_xmit_sg = 1, rm->data.op_nents = 1] + +What the current fix does: +Since the congestion update over loopback is not actually transmitted +as a message, all that rds_ib_xmit needs to do is let the caller think +the full message has been transmitted and not return partial bytes. +It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb. +And 64Kb+48 when page size is 64Kb. + +Reported-by: Josh Hunt +Tested-by: Honggang Li +Acked-by: Bang Nguyen +Signed-off-by: Venkat Venkatsubra +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/ib_send.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/rds/ib_send.c ++++ b/net/rds/ib_send.c +@@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *c + && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + scat = &rm->data.op_sg[sg]; +- ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; +- ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); +- return ret; ++ ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); ++ return sizeof(struct rds_header) + ret; + } + + /* FIXME we may overallocate here */ diff --git a/queue-3.10/rds-prevent-dereference-of-a-null-device.patch b/queue-3.10/rds-prevent-dereference-of-a-null-device.patch new file mode 100644 index 00000000000..692f7a02a0b --- /dev/null +++ b/queue-3.10/rds-prevent-dereference-of-a-null-device.patch @@ -0,0 +1,77 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Sasha Levin +Date: Wed, 18 Dec 2013 23:49:42 -0500 +Subject: rds: prevent dereference of a NULL device + +From: Sasha Levin + +[ Upstream commit c2349758acf1874e4c2b93fe41d072336f1a31d0 ] + +Binding might result in a NULL device, which is dereferenced +causing this BUG: + +[ 1317.260548] BUG: unable to handle kernel NULL pointer dereference at 000000000000097 +4 +[ 1317.261847] IP: [] rds_ib_laddr_check+0x82/0x110 +[ 1317.263315] PGD 418bcb067 PUD 3ceb21067 PMD 0 +[ 1317.263502] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC +[ 1317.264179] Dumping ftrace buffer: +[ 1317.264774] (ftrace buffer empty) +[ 1317.265220] Modules linked in: +[ 1317.265824] CPU: 4 PID: 836 Comm: trinity-child46 Tainted: G W 3.13.0-rc4- +next-20131218-sasha-00013-g2cebb9b-dirty #4159 +[ 1317.267415] task: ffff8803ddf33000 ti: ffff8803cd31a000 task.ti: ffff8803cd31a000 +[ 1317.268399] RIP: 0010:[] [] rds_ib_laddr_check+ +0x82/0x110 +[ 1317.269670] RSP: 0000:ffff8803cd31bdf8 EFLAGS: 00010246 +[ 1317.270230] RAX: 0000000000000000 RBX: ffff88020b0dd388 RCX: 0000000000000000 +[ 1317.270230] RDX: ffffffff8439822e RSI: 00000000000c000a RDI: 0000000000000286 +[ 1317.270230] RBP: ffff8803cd31be38 R08: 0000000000000000 R09: 0000000000000000 +[ 1317.270230] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 +[ 1317.270230] R13: 0000000054086700 R14: 0000000000a25de0 R15: 0000000000000031 +[ 1317.270230] FS: 00007ff40251d700(0000) GS:ffff88022e200000(0000) knlGS:000000000000 +0000 +[ 1317.270230] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[ 1317.270230] CR2: 0000000000000974 CR3: 00000003cd478000 CR4: 00000000000006e0 +[ 1317.270230] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 1317.270230] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 +[ 1317.270230] Stack: +[ 1317.270230] 0000000054086700 5408670000a25de0 5408670000000002 0000000000000000 +[ 1317.270230] ffffffff84223542 00000000ea54c767 0000000000000000 ffffffff86d26160 +[ 1317.270230] ffff8803cd31be68 ffffffff84223556 ffff8803cd31beb8 ffff8800c6765280 +[ 1317.270230] Call Trace: +[ 1317.270230] [] ? rds_trans_get_preferred+0x42/0xa0 +[ 1317.270230] [] rds_trans_get_preferred+0x56/0xa0 +[ 1317.270230] [] rds_bind+0x73/0xf0 +[ 1317.270230] [] SYSC_bind+0x92/0xf0 +[ 1317.270230] [] ? context_tracking_user_exit+0xb8/0x1d0 +[ 1317.270230] [] ? trace_hardirqs_on+0xd/0x10 +[ 1317.270230] [] ? syscall_trace_enter+0x32/0x290 +[ 1317.270230] [] SyS_bind+0xe/0x10 +[ 1317.270230] [] tracesys+0xdd/0xe2 +[ 1317.270230] Code: 00 8b 45 cc 48 8d 75 d0 48 c7 45 d8 00 00 00 00 66 c7 45 d0 02 00 +89 45 d4 48 89 df e8 78 49 76 ff 41 89 c4 85 c0 75 0c 48 8b 03 <80> b8 74 09 00 00 01 7 +4 06 41 bc 9d ff ff ff f6 05 2a b6 c2 02 +[ 1317.270230] RIP [] rds_ib_laddr_check+0x82/0x110 +[ 1317.270230] RSP +[ 1317.270230] CR2: 0000000000000974 + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/ib.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/rds/ib.c ++++ b/net/rds/ib.c +@@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 add + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + /* due to this, we will claim to support iWARP devices unless we + check node_type. */ +- if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) ++ if (ret || !cm_id->device || ++ cm_id->device->node_type != RDMA_NODE_IB_CA) + ret = -EADDRNOTAVAIL; + + rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/queue-3.10/series b/queue-3.10/series index 1fa5f8beab2..ca90f026d6e 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -2,3 +2,36 @@ irqchip-renesas-irqc-fix-irqc_probe-error-handling.patch clocksource-em_sti-set-cpu_possible_mask-to-fix-smp-broadcast.patch gpio-rcar-r-car-gpio-irq-share-interrupt.patch hid-revert-revert-hid-fix-logitech-dj-missing-unifying-device-issue.patch +ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch +net-do-not-pretend-fraglist-support.patch +rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch +macvtap-do-not-double-count-received-packets.patch +macvtap-update-file-current-position.patch +tun-update-file-current-position.patch +macvtap-signal-truncated-packets.patch +virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch +packet-fix-send-path-when-running-with-proto-0.patch +ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch +net-drop_monitor-fix-the-value-of-maxattr.patch +net-unix-allow-set_peek_off-to-fail.patch +tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch +netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch +ipv6-fix-illegal-mac_header-comparison-on-32bit.patch +net-unix-allow-bind-to-fail-on-mutex-lock.patch +ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch +net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch +drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch +hamradio-yam-fix-info-leak-in-ioctl.patch +net-fec-fix-potential-use-after-free.patch +ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch +rds-prevent-dereference-of-a-null-device.patch +net-rose-restore-old-recvmsg-behavior.patch +vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch +virtio_net-fix-error-handling-for-mergeable-buffers.patch +virtio-net-make-all-rx-paths-handle-errors-consistently.patch +virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch +virtio-net-fix-refill-races-during-restore.patch +net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch +netpoll-fix-missing-txq-unlock-and-and-oops.patch +bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch +net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch diff --git a/queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch b/queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch new file mode 100644 index 00000000000..d26f6b9e594 --- /dev/null +++ b/queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch @@ -0,0 +1,37 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Nat Gurumoorthy +Date: Mon, 9 Dec 2013 10:43:21 -0800 +Subject: tg3: Initialize REG_BASE_ADDR at PCI config offset 120 to 0 + +From: Nat Gurumoorthy + +[ Upstream commit 388d3335575f4c056dcf7138a30f1454e2145cd8 ] + +The new tg3 driver leaves REG_BASE_ADDR (PCI config offset 120) +uninitialized. From power on reset this register may have garbage in it. The +Register Base Address register defines the device local address of a +register. The data pointed to by this location is read or written using +the Register Data register (PCI config offset 128). When REG_BASE_ADDR has +garbage any read or write of Register Data Register (PCI 128) will cause the +PCI bus to lock up. The TCO watchdog will fire and bring down the system. + +Signed-off-by: Nat Gurumoorthy +Acked-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -16297,6 +16297,9 @@ static int tg3_get_invariants(struct tg3 + /* Clear this out for sanity. */ + tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0); + ++ /* Clear TG3PCI_REG_BASE_ADDR to prevent hangs. */ ++ tw32(TG3PCI_REG_BASE_ADDR, 0); ++ + pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE, + &pci_state_reg); + if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 && diff --git a/queue-3.10/tun-update-file-current-position.patch b/queue-3.10/tun-update-file-current-position.patch new file mode 100644 index 00000000000..056d7e2c109 --- /dev/null +++ b/queue-3.10/tun-update-file-current-position.patch @@ -0,0 +1,27 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Zhi Yong Wu +Date: Fri, 6 Dec 2013 14:16:51 +0800 +Subject: tun: update file current position + +From: Zhi Yong Wu + +[ Upstream commit d0b7da8afa079ffe018ab3e92879b7138977fc8f ] + +Signed-off-by: Zhi Yong Wu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1412,6 +1412,8 @@ static ssize_t tun_chr_aio_read(struct k + ret = tun_do_read(tun, tfile, iocb, iv, len, + file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); ++ if (ret > 0) ++ iocb->ki_pos = ret; + out: + tun_put(tun); + return ret; diff --git a/queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch b/queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch new file mode 100644 index 00000000000..c4c890db4f4 --- /dev/null +++ b/queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch @@ -0,0 +1,80 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Andrey Vagin +Date: Thu, 5 Dec 2013 18:36:21 +0400 +Subject: virtio: delete napi structures from netdev before releasing memory + +From: Andrey Vagin + +[ Upstream commit d4fb84eefe5164f6a6ea51d0a9e26280c661a0dd ] + +free_netdev calls netif_napi_del too, but it's too late, because napi +structures are placed on vi->rq. netif_napi_add() is called from +virtnet_alloc_queues. + +general protection fault: 0000 [#1] SMP +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii +CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171 +Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000 +RIP: 0010:[] [] __list_del_entry+0x29/0xd0 +RSP: 0018:ffff8800379e1dd0 EFLAGS: 00010a83 +RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200 +RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0 +RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90 +R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0 +FS: 00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0 +Stack: + ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0 + ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20 + ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388 +Call Trace: + [] netif_napi_del+0x1b/0x80 + [] free_netdev+0x8b/0x110 + [] virtnet_remove+0x7c/0x90 [virtio_net] + [] virtio_dev_remove+0x23/0x80 + [] __device_release_driver+0x7f/0xf0 + [] driver_detach+0xc0/0xd0 + [] bus_remove_driver+0x58/0xd0 + [] driver_unregister+0x2c/0x50 + [] unregister_virtio_driver+0xe/0x10 + [] virtio_net_driver_exit+0x10/0x6ce [virtio_net] + [] SyS_delete_module+0x172/0x220 + [] ? trace_hardirqs_on+0xd/0x10 + [] ? __audit_syscall_entry+0x9c/0xf0 + [] system_call_fastpath+0x16/0x1b +Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 +RIP [] __list_del_entry+0x29/0xd0 + RSP +---[ end trace d5931cd3f87c9763 ]--- + +Fixes: 986a4f4d452d (virtio_net: multiqueue support) +Cc: Rusty Russell +Cc: "Michael S. Tsirkin" +Signed-off-by: Andrey Vagin +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1285,6 +1285,11 @@ static void virtnet_config_changed(struc + + static void virtnet_free_queues(struct virtnet_info *vi) + { ++ int i; ++ ++ for (i = 0; i < vi->max_queue_pairs; i++) ++ netif_napi_del(&vi->rq[i].napi); ++ + kfree(vi->rq); + kfree(vi->sq); + } diff --git a/queue-3.10/virtio-net-fix-refill-races-during-restore.patch b/queue-3.10/virtio-net-fix-refill-races-during-restore.patch new file mode 100644 index 00000000000..0e70dd36836 --- /dev/null +++ b/queue-3.10/virtio-net-fix-refill-races-during-restore.patch @@ -0,0 +1,52 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: Jason Wang +Date: Mon, 30 Dec 2013 11:34:40 +0800 +Subject: virtio-net: fix refill races during restore + +From: Jason Wang + +[ Upstream commit 6cd4ce0099da7702f885b6fa9ebb49e3831d90b4 ] + +During restoring, try_fill_recv() was called with neither napi lock nor napi +disabled. This can lead two try_fill_recv() was called in the same time. Fix +this by refilling before trying to enable napi. + +Fixes 0741bcb5584f9e2390ae6261573c4de8314999f2 +(virtio: net: Add freeze, restore handlers to support S4). + +Cc: Amit Shah +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Cc: Eric Dumazet +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1745,16 +1745,17 @@ static int virtnet_restore(struct virtio + if (err) + return err; + +- if (netif_running(vi->dev)) ++ if (netif_running(vi->dev)) { ++ for (i = 0; i < vi->curr_queue_pairs; i++) ++ if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) ++ schedule_delayed_work(&vi->refill, 0); ++ + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(&vi->rq[i]); ++ } + + netif_device_attach(vi->dev); + +- for (i = 0; i < vi->curr_queue_pairs; i++) +- if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) +- schedule_delayed_work(&vi->refill, 0); +- + mutex_lock(&vi->config_lock); + vi->config_enable = true; + mutex_unlock(&vi->config_lock); diff --git a/queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch b/queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch new file mode 100644 index 00000000000..ce04176f451 --- /dev/null +++ b/queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: "Michael S. Tsirkin" +Date: Thu, 26 Dec 2013 15:32:51 +0200 +Subject: virtio-net: make all RX paths handle errors consistently + +From: "Michael S. Tsirkin" + +receive mergeable now handles errors internally. +Do same for big and small packet paths, otherwise +the logic is too hard to follow. + +Cc: Jason Wang +Cc: David S. Miller +Acked-by: Michael Dalton +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f121159d72091f25afb22007c833e60a6845e912) +Acked-by: Jason Wang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 56 ++++++++++++++++++++++++++++++----------------- + 1 file changed, 36 insertions(+), 20 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -294,6 +294,34 @@ static struct sk_buff *page_to_skb(struc + return skb; + } + ++static struct sk_buff *receive_small(void *buf, unsigned int len) ++{ ++ struct sk_buff * skb = buf; ++ ++ len -= sizeof(struct virtio_net_hdr); ++ skb_trim(skb, len); ++ ++ return skb; ++} ++ ++static struct sk_buff *receive_big(struct net_device *dev, ++ struct receive_queue *rq, ++ void *buf) ++{ ++ struct page *page = buf; ++ struct sk_buff *skb = page_to_skb(rq, page, 0); ++ ++ if (unlikely(!skb)) ++ goto err; ++ ++ return skb; ++ ++err: ++ dev->stats.rx_dropped++; ++ give_pages(rq, page); ++ return NULL; ++} ++ + static struct sk_buff *receive_mergeable(struct net_device *dev, + struct receive_queue *rq, + void *buf, +@@ -357,7 +385,6 @@ static void receive_buf(struct receive_q + struct net_device *dev = vi->dev; + struct virtnet_stats *stats = this_cpu_ptr(vi->stats); + struct sk_buff *skb; +- struct page *page; + struct skb_vnet_hdr *hdr; + + if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { +@@ -369,26 +396,15 @@ static void receive_buf(struct receive_q + dev_kfree_skb(buf); + return; + } ++ if (vi->mergeable_rx_bufs) ++ skb = receive_mergeable(dev, rq, buf, len); ++ else if (vi->big_packets) ++ skb = receive_big(dev, rq, buf); ++ else ++ skb = receive_small(buf, len); + +- if (!vi->mergeable_rx_bufs && !vi->big_packets) { +- skb = buf; +- len -= sizeof(struct virtio_net_hdr); +- skb_trim(skb, len); +- } else { +- page = buf; +- if (vi->mergeable_rx_bufs) { +- skb = receive_mergeable(dev, rq, page, len); +- if (unlikely(!skb)) +- return; +- } else { +- skb = page_to_skb(rq, page, len); +- if (unlikely(!skb)) { +- dev->stats.rx_dropped++; +- give_pages(rq, page); +- return; +- } +- } +- } ++ if (unlikely(!skb)) ++ return; + + hdr = skb_vnet_hdr(skb); + diff --git a/queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch b/queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch new file mode 100644 index 00000000000..68ffe70fc93 --- /dev/null +++ b/queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: "Michael S. Tsirkin" +Date: Thu, 26 Dec 2013 15:32:55 +0200 +Subject: virtio_net: don't leak memory or block when too many frags + +From: "Michael S. Tsirkin" + +We leak an skb when there are too many frags, +we also stop processing the packet in the middle, +the result is almost sure to be loss of networking. + +Reported-by: Michael Dalton +Acked-by: Michael Dalton +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -341,7 +341,7 @@ static struct sk_buff *receive_mergeable + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long\n", skb->dev->name); + skb->dev->stats.rx_length_errors++; +- return NULL; ++ goto err_frags; + } + page = virtqueue_get_buf(rq->vq, &len); + if (!page) { +@@ -362,6 +362,7 @@ static struct sk_buff *receive_mergeable + err_skb: + give_pages(rq, page); + while (--num_buf) { ++err_frags: + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers missing\n", diff --git a/queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch b/queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch new file mode 100644 index 00000000000..da0db3aac7b --- /dev/null +++ b/queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch @@ -0,0 +1,129 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: "Michael S. Tsirkin" +Date: Thu, 26 Dec 2013 15:32:47 +0200 +Subject: virtio_net: fix error handling for mergeable buffers + +From: "Michael S. Tsirkin" + +Eric Dumazet noticed that if we encounter an error +when processing a mergeable buffer, we don't +dequeue all of the buffers from this packet, +the result is almost sure to be loss of networking. + +Fix this issue. + +Cc: Rusty Russell +Cc: Michael Dalton +Acked-by: Michael Dalton +Cc: Eric Dumazet +Cc: Jason Wang +Cc: David S. Miller +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8fc3b9e9a229778e5af3aa453c44f1a3857ba769) +Acked-by: Jason Wang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 66 ++++++++++++++++++++++++++++++++--------------- + 1 file changed, 46 insertions(+), 20 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -294,26 +294,33 @@ static struct sk_buff *page_to_skb(struc + return skb; + } + +-static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb) ++static struct sk_buff *receive_mergeable(struct net_device *dev, ++ struct receive_queue *rq, ++ void *buf, ++ unsigned int len) + { +- struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); +- struct page *page; +- int num_buf, i, len; ++ struct skb_vnet_hdr *hdr = page_address(buf); ++ int num_buf = hdr->mhdr.num_buffers; ++ struct page *page = buf; ++ struct sk_buff *skb = page_to_skb(rq, page, len); ++ int i; ++ ++ if (unlikely(!skb)) ++ goto err_skb; + +- num_buf = hdr->mhdr.num_buffers; + while (--num_buf) { + i = skb_shinfo(skb)->nr_frags; + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long\n", skb->dev->name); + skb->dev->stats.rx_length_errors++; +- return -EINVAL; ++ return NULL; + } + page = virtqueue_get_buf(rq->vq, &len); + if (!page) { +- pr_debug("%s: rx error: %d buffers missing\n", +- skb->dev->name, hdr->mhdr.num_buffers); +- skb->dev->stats.rx_length_errors++; +- return -EINVAL; ++ pr_debug("%s: rx error: %d buffers %d missing\n", ++ dev->name, hdr->mhdr.num_buffers, num_buf); ++ dev->stats.rx_length_errors++; ++ goto err_buf; + } + + if (len > PAGE_SIZE) +@@ -323,7 +330,25 @@ static int receive_mergeable(struct rece + + --rq->num; + } +- return 0; ++ return skb; ++err_skb: ++ give_pages(rq, page); ++ while (--num_buf) { ++ buf = virtqueue_get_buf(rq->vq, &len); ++ if (unlikely(!buf)) { ++ pr_debug("%s: rx error: %d buffers missing\n", ++ dev->name, num_buf); ++ dev->stats.rx_length_errors++; ++ break; ++ } ++ page = buf; ++ give_pages(rq, page); ++ --rq->num; ++ } ++err_buf: ++ dev->stats.rx_dropped++; ++ dev_kfree_skb(skb); ++ return NULL; + } + + static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) +@@ -351,17 +376,18 @@ static void receive_buf(struct receive_q + skb_trim(skb, len); + } else { + page = buf; +- skb = page_to_skb(rq, page, len); +- if (unlikely(!skb)) { +- dev->stats.rx_dropped++; +- give_pages(rq, page); +- return; +- } +- if (vi->mergeable_rx_bufs) +- if (receive_mergeable(rq, skb)) { +- dev_kfree_skb(skb); ++ if (vi->mergeable_rx_bufs) { ++ skb = receive_mergeable(dev, rq, page, len); ++ if (unlikely(!skb)) ++ return; ++ } else { ++ skb = page_to_skb(rq, page, len); ++ if (unlikely(!skb)) { ++ dev->stats.rx_dropped++; ++ give_pages(rq, page); + return; + } ++ } + } + + hdr = skb_vnet_hdr(skb); diff --git a/queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch b/queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch new file mode 100644 index 00000000000..6f220418e01 --- /dev/null +++ b/queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch @@ -0,0 +1,105 @@ +From foo@baz Mon Jan 13 09:39:01 PST 2014 +From: "David S. Miller" +Date: Tue, 31 Dec 2013 16:23:35 -0500 +Subject: vlan: Fix header ops passthru when doing TX VLAN offload. + +From: "David S. Miller" + +[ Upstream commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ] + +When the vlan code detects that the real device can do TX VLAN offloads +in hardware, it tries to arrange for the real device's header_ops to +be invoked directly. + +But it does so illegally, by simply hooking the real device's +header_ops up to the VLAN device. + +This doesn't work because we will end up invoking a set of header_ops +routines which expect a device type which matches the real device, but +will see a VLAN device instead. + +Fix this by providing a pass-thru set of header_ops which will arrange +to pass the proper real device instead. + +To facilitate this add a dev_rebuild_header(). There are +implementations which provide a ->cache and ->create but not a +->rebuild (f.e. PLIP). So we need a helper function just like +dev_hard_header() to avoid crashes. + +Use this helper in the one existing place where the +header_ops->rebuild was being invoked, the neighbour code. + +With lots of help from Florian Westphal. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 9 +++++++++ + net/8021q/vlan_dev.c | 19 ++++++++++++++++++- + net/core/neighbour.c | 2 +- + 3 files changed, 28 insertions(+), 2 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1772,6 +1772,15 @@ static inline int dev_parse_header(const + return dev->header_ops->parse(skb, haddr); + } + ++static inline int dev_rebuild_header(struct sk_buff *skb) ++{ ++ const struct net_device *dev = skb->dev; ++ ++ if (!dev->header_ops || !dev->header_ops->rebuild) ++ return 0; ++ return dev->header_ops->rebuild(skb); ++} ++ + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); + extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); + static inline int unregister_gifconf(unsigned int family) +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -549,6 +549,23 @@ static const struct header_ops vlan_head + .parse = eth_header_parse, + }; + ++static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, ++ unsigned short type, ++ const void *daddr, const void *saddr, ++ unsigned int len) ++{ ++ struct vlan_dev_priv *vlan = vlan_dev_priv(dev); ++ struct net_device *real_dev = vlan->real_dev; ++ ++ return dev_hard_header(skb, real_dev, type, daddr, saddr, len); ++} ++ ++static const struct header_ops vlan_passthru_header_ops = { ++ .create = vlan_passthru_hard_header, ++ .rebuild = dev_rebuild_header, ++ .parse = eth_header_parse, ++}; ++ + static struct device_type vlan_type = { + .name = "vlan", + }; +@@ -592,7 +609,7 @@ static int vlan_dev_init(struct net_devi + + dev->needed_headroom = real_dev->needed_headroom; + if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { +- dev->header_ops = real_dev->header_ops; ++ dev->header_ops = &vlan_passthru_header_ops; + dev->hard_header_len = real_dev->hard_header_len; + } else { + dev->header_ops = &vlan_header_ops; +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -1274,7 +1274,7 @@ int neigh_compat_output(struct neighbour + + if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, + skb->len) < 0 && +- dev->header_ops->rebuild(skb)) ++ dev_rebuild_header(skb)) + return 0; + + return dev_queue_xmit(skb);