]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Jan 2014 17:49:25 +0000 (09:49 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Jan 2014 17:49:25 +0000 (09:49 -0800)
added patches:
bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch
drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch
hamradio-yam-fix-info-leak-in-ioctl.patch
ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch
ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch
ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch
ipv6-fix-illegal-mac_header-comparison-on-32bit.patch
ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch
macvtap-do-not-double-count-received-packets.patch
macvtap-signal-truncated-packets.patch
macvtap-update-file-current-position.patch
net-do-not-pretend-fraglist-support.patch
net-drop_monitor-fix-the-value-of-maxattr.patch
net-fec-fix-potential-use-after-free.patch
net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch
net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch
net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch
net-rose-restore-old-recvmsg-behavior.patch
net-unix-allow-bind-to-fail-on-mutex-lock.patch
net-unix-allow-set_peek_off-to-fail.patch
netpoll-fix-missing-txq-unlock-and-and-oops.patch
netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch
packet-fix-send-path-when-running-with-proto-0.patch
rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch
rds-prevent-dereference-of-a-null-device.patch
tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch
tun-update-file-current-position.patch
virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch
virtio-net-fix-refill-races-during-restore.patch
virtio-net-make-all-rx-paths-handle-errors-consistently.patch
virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch
virtio_net-fix-error-handling-for-mergeable-buffers.patch
vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch

34 files changed:
queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch [new file with mode: 0644]
queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch [new file with mode: 0644]
queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch [new file with mode: 0644]
queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch [new file with mode: 0644]
queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch [new file with mode: 0644]
queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch [new file with mode: 0644]
queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch [new file with mode: 0644]
queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch [new file with mode: 0644]
queue-3.10/macvtap-do-not-double-count-received-packets.patch [new file with mode: 0644]
queue-3.10/macvtap-signal-truncated-packets.patch [new file with mode: 0644]
queue-3.10/macvtap-update-file-current-position.patch [new file with mode: 0644]
queue-3.10/net-do-not-pretend-fraglist-support.patch [new file with mode: 0644]
queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch [new file with mode: 0644]
queue-3.10/net-fec-fix-potential-use-after-free.patch [new file with mode: 0644]
queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch [new file with mode: 0644]
queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch [new file with mode: 0644]
queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch [new file with mode: 0644]
queue-3.10/net-rose-restore-old-recvmsg-behavior.patch [new file with mode: 0644]
queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch [new file with mode: 0644]
queue-3.10/net-unix-allow-set_peek_off-to-fail.patch [new file with mode: 0644]
queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch [new file with mode: 0644]
queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch [new file with mode: 0644]
queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch [new file with mode: 0644]
queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch [new file with mode: 0644]
queue-3.10/rds-prevent-dereference-of-a-null-device.patch [new file with mode: 0644]
queue-3.10/series
queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch [new file with mode: 0644]
queue-3.10/tun-update-file-current-position.patch [new file with mode: 0644]
queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch [new file with mode: 0644]
queue-3.10/virtio-net-fix-refill-races-during-restore.patch [new file with mode: 0644]
queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch [new file with mode: 0644]
queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch [new file with mode: 0644]
queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch [new file with mode: 0644]
queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch [new file with mode: 0644]

diff --git a/queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch b/queue-3.10/bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch
new file mode 100644 (file)
index 0000000..d87120d
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Curt Brune <curt@cumulusnetworks.com>
+Date: Mon, 6 Jan 2014 11:00:32 -0800
+Subject: bridge: use spin_lock_bh() in br_multicast_set_hash_max
+
+From: Curt Brune <curt@cumulusnetworks.com>
+
+[ Upstream commit fe0d692bbc645786bce1a98439e548ae619269f5 ]
+
+br_multicast_set_hash_max() is called from process context in
+net/bridge/br_sysfs_br.c by the sysfs store_hash_max() function.
+
+br_multicast_set_hash_max() calls spin_lock(&br->multicast_lock),
+which can deadlock the CPU if a softirq that also tries to take the
+same lock interrupts br_multicast_set_hash_max() while the lock is
+held .  This can happen quite easily when any of the bridge multicast
+timers expire, which try to take the same lock.
+
+The fix here is to use spin_lock_bh(), preventing other softirqs from
+executing on this CPU.
+
+Steps to reproduce:
+
+1. Create a bridge with several interfaces (I used 4).
+2. Set the "multicast query interval" to a low number, like 2.
+3. Enable the bridge as a multicast querier.
+4. Repeatedly set the bridge hash_max parameter via sysfs.
+
+  # brctl addbr br0
+  # brctl addif br0 eth1 eth2 eth3 eth4
+  # brctl setmcqi br0 2
+  # brctl setmcquerier br0 1
+
+  # while true ; do echo 4096 > /sys/class/net/br0/bridge/hash_max; done
+
+Signed-off-by: Curt Brune <curt@cumulusnetworks.com>
+Signed-off-by: Scott Feldman <sfeldma@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1839,7 +1839,7 @@ int br_multicast_set_hash_max(struct net
+       u32 old;
+       struct net_bridge_mdb_htable *mdb;
+-      spin_lock(&br->multicast_lock);
++      spin_lock_bh(&br->multicast_lock);
+       if (!netif_running(br->dev))
+               goto unlock;
+@@ -1871,7 +1871,7 @@ rollback:
+       }
+ unlock:
+-      spin_unlock(&br->multicast_lock);
++      spin_unlock_bh(&br->multicast_lock);
+       return err;
+ }
diff --git a/queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch b/queue-3.10/drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch
new file mode 100644 (file)
index 0000000..89cd4d3
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Wenliang Fan <fanwlexca@gmail.com>
+Date: Tue, 17 Dec 2013 11:25:28 +0800
+Subject: drivers/net/hamradio: Integer overflow in hdlcdrv_ioctl()
+
+From: Wenliang Fan <fanwlexca@gmail.com>
+
+[ Upstream commit e9db5c21d3646a6454fcd04938dd215ac3ab620a ]
+
+The local variable 'bi' comes from userspace. If userspace passed a
+large number to 'bi.data.calibrate', there would be an integer overflow
+in the following line:
+       s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;
+
+Signed-off-by: Wenliang Fan <fanwlexca@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hamradio/hdlcdrv.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/hamradio/hdlcdrv.c
++++ b/drivers/net/hamradio/hdlcdrv.c
+@@ -571,6 +571,8 @@ static int hdlcdrv_ioctl(struct net_devi
+       case HDLCDRVCTL_CALIBRATE:
+               if(!capable(CAP_SYS_RAWIO))
+                       return -EPERM;
++              if (bi.data.calibrate > INT_MAX / s->par.bitrate)
++                      return -EINVAL;
+               s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;
+               return 0;
diff --git a/queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch b/queue-3.10/hamradio-yam-fix-info-leak-in-ioctl.patch
new file mode 100644 (file)
index 0000000..06773fd
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: =?UTF-8?q?Salva=20Peir=C3=B3?= <speiro@ai2.upv.es>
+Date: Tue, 17 Dec 2013 10:06:30 +0100
+Subject: hamradio/yam: fix info leak in ioctl
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Salva Peiró <speiro@ai2.upv.es>
+
+[ Upstream commit 8e3fbf870481eb53b2d3a322d1fc395ad8b367ed ]
+
+The yam_ioctl() code fails to initialise the cmd field
+of the struct yamdrv_ioctl_cfg. Add an explicit memset(0)
+before filling the structure to avoid the 4-byte info leak.
+
+Signed-off-by: Salva Peiró <speiro@ai2.upv.es>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hamradio/yam.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/hamradio/yam.c
++++ b/drivers/net/hamradio/yam.c
+@@ -1058,6 +1058,7 @@ static int yam_ioctl(struct net_device *
+               break;
+       case SIOCYAMGCFG:
++              memset(&yi, 0, sizeof(yi));
+               yi.cfg.mask = 0xffffffff;
+               yi.cfg.iobase = yp->iobase;
+               yi.cfg.irq = yp->irq;
diff --git a/queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch b/queue-3.10/ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch
new file mode 100644 (file)
index 0000000..3dd6366
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Mon, 16 Dec 2013 11:02:09 +0200
+Subject: ip_gre: fix msg_name parsing for recvfrom/recvmsg
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Timo Teräs <timo.teras@iki.fi>
+
+[ Upstream commit 0e3da5bb8da45890b1dc413404e0f978ab71173e ]
+
+ipgre_header_parse() needs to parse the tunnel's ip header and it
+uses mac_header to locate the iphdr. This got broken when gre tunneling
+was refactored as mac_header is no longer updated to point to iphdr.
+Introduce skb_pop_mac_header() helper to do the mac_header assignment
+and use it in ipgre_rcv() to fix msg_name parsing.
+
+Bug introduced in commit c54419321455 (GRE: Refactor GRE tunneling code.)
+
+Cc: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: Timo Teräs <timo.teras@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -335,6 +335,7 @@ static int ipgre_rcv(struct sk_buff *skb
+                                 iph->saddr, iph->daddr, tpi.key);
+       if (tunnel) {
++              skb_pop_mac_header(skb);
+               ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error);
+               return 0;
+       }
diff --git a/queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch b/queue-3.10/ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch
new file mode 100644 (file)
index 0000000..1fea43f
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Li RongQing <roy.qing.li@gmail.com>
+Date: Thu, 19 Dec 2013 12:40:26 +0800
+Subject: ipv6: always set the new created dst's from in ip6_rt_copy
+
+From: Li RongQing <roy.qing.li@gmail.com>
+
+[ Upstream commit 24f5b855e17df7e355eacd6c4a12cc4d6a6c9ff0 ]
+
+ip6_rt_copy only sets dst.from if ort has flag RTF_ADDRCONF and RTF_DEFAULT.
+but the prefix routes which did get installed by hand locally can have an
+expiration, and no any flag combination which can ensure a potential from
+does never expire, so we should always set the new created dst's from.
+
+This also fixes the new created dst is always expired since the ort, which
+is created by RA, maybe has RTF_EXPIRES and RTF_ADDRCONF, but no RTF_DEFAULT.
+
+Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+CC: Gao feng <gaofeng@cn.fujitsu.com>
+Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1838,9 +1838,7 @@ static struct rt6_info *ip6_rt_copy(stru
+               else
+                       rt->rt6i_gateway = *dest;
+               rt->rt6i_flags = ort->rt6i_flags;
+-              if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
+-                  (RTF_DEFAULT | RTF_ADDRCONF))
+-                      rt6_set_from(rt, ort);
++              rt6_set_from(rt, ort);
+               rt->rt6i_metric = 0;
+ #ifdef CONFIG_IPV6_SUBTREES
diff --git a/queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch b/queue-3.10/ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch
new file mode 100644 (file)
index 0000000..50aab48
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Sat, 7 Dec 2013 03:33:45 +0100
+Subject: ipv6: don't count addrconf generated routes against gc limit
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f ]
+
+Brett Ciphery reported that new ipv6 addresses failed to get installed
+because the addrconf generated dsts where counted against the dst gc
+limit. We don't need to count those routes like we currently don't count
+administratively added routes.
+
+Because the max_addresses check enforces a limit on unbounded address
+generation first in case someone plays with router advertisments, we
+are still safe here.
+
+Reported-by: Brett Ciphery <brett.ciphery@windriver.com>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2099,12 +2099,10 @@ struct rt6_info *addrconf_dst_alloc(stru
+                                   bool anycast)
+ {
+       struct net *net = dev_net(idev->dev);
+-      struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
+-
+-      if (!rt) {
+-              net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
++      struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
++                                          DST_NOCOUNT, NULL);
++      if (!rt)
+               return ERR_PTR(-ENOMEM);
+-      }
+       in6_dev_hold(idev);
diff --git a/queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch b/queue-3.10/ipv6-fix-illegal-mac_header-comparison-on-32bit.patch
new file mode 100644 (file)
index 0000000..2097cdc
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 13 Dec 2013 15:12:27 +0100
+Subject: ipv6: fix illegal mac_header comparison on 32bit
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |    5 +++++
+ net/ipv6/udp_offload.c |    2 +-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1741,6 +1741,11 @@ static inline void skb_set_mac_header(st
+ }
+ #endif /* NET_SKBUFF_DATA_USES_OFFSET */
++static inline void skb_pop_mac_header(struct sk_buff *skb)
++{
++      skb->mac_header = skb->network_header;
++}
++
+ static inline void skb_probe_transport_header(struct sk_buff *skb,
+                                             const int offset_hint)
+ {
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -85,7 +85,7 @@ static struct sk_buff *udp6_ufo_fragment
+       /* Check if there is enough headroom to insert fragment header. */
+       tnl_hlen = skb_tnl_header_len(skb);
+-      if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
++      if (skb_mac_header(skb) < skb->head + tnl_hlen + frag_hdr_sz) {
+               if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+                       goto out;
+       }
diff --git a/queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch b/queue-3.10/ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch
new file mode 100644 (file)
index 0000000..e792dfb
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Kamala R <kamala@aristanetworks.com>
+Date: Mon, 2 Dec 2013 19:55:21 +0530
+Subject: IPv6: Fixed support for blackhole and prohibit routes
+
+From: Kamala R <kamala@aristanetworks.com>
+
+[ Upstream commit 7150aede5dd241539686e17d9592f5ebd28a2cda ]
+
+The behaviour of blackhole and prohibit routes has been corrected by setting
+the input and output pointers of the dst variable appropriately. For
+blackhole routes, they are set to dst_discard and to ip6_pkt_discard and
+ip6_pkt_discard_out respectively for prohibit routes.
+
+ipv6: ip6_pkt_prohibit(_out) should not depend on
+CONFIG_IPV6_MULTIPLE_TABLES
+
+We need ip6_pkt_prohibit(_out) available without
+CONFIG_IPV6_MULTIPLE_TABLES
+
+Signed-off-by: Kamala R <kamala@aristanetworks.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |   22 ++++++++++------------
+ 1 file changed, 10 insertions(+), 12 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -84,6 +84,8 @@ static int            ip6_dst_gc(struct dst_ops *
+ static int            ip6_pkt_discard(struct sk_buff *skb);
+ static int            ip6_pkt_discard_out(struct sk_buff *skb);
++static int            ip6_pkt_prohibit(struct sk_buff *skb);
++static int            ip6_pkt_prohibit_out(struct sk_buff *skb);
+ static void           ip6_link_failure(struct sk_buff *skb);
+ static void           ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                                          struct sk_buff *skb, u32 mtu);
+@@ -233,9 +235,6 @@ static const struct rt6_info ip6_null_en
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+-static int ip6_pkt_prohibit(struct sk_buff *skb);
+-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+-
+ static const struct rt6_info ip6_prohibit_entry_template = {
+       .dst = {
+               .__refcnt       = ATOMIC_INIT(1),
+@@ -1498,21 +1497,24 @@ int ip6_route_add(struct fib6_config *cf
+                               goto out;
+                       }
+               }
+-              rt->dst.output = ip6_pkt_discard_out;
+-              rt->dst.input = ip6_pkt_discard;
+               rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+               switch (cfg->fc_type) {
+               case RTN_BLACKHOLE:
+                       rt->dst.error = -EINVAL;
++                      rt->dst.output = dst_discard;
++                      rt->dst.input = dst_discard;
+                       break;
+               case RTN_PROHIBIT:
+                       rt->dst.error = -EACCES;
++                      rt->dst.output = ip6_pkt_prohibit_out;
++                      rt->dst.input = ip6_pkt_prohibit;
+                       break;
+               case RTN_THROW:
+-                      rt->dst.error = -EAGAIN;
+-                      break;
+               default:
+-                      rt->dst.error = -ENETUNREACH;
++                      rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
++                                      : -ENETUNREACH;
++                      rt->dst.output = ip6_pkt_discard_out;
++                      rt->dst.input = ip6_pkt_discard;
+                       break;
+               }
+               goto install_route;
+@@ -2077,8 +2079,6 @@ static int ip6_pkt_discard_out(struct sk
+       return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
+ }
+-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+-
+ static int ip6_pkt_prohibit(struct sk_buff *skb)
+ {
+       return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
+@@ -2090,8 +2090,6 @@ static int ip6_pkt_prohibit_out(struct s
+       return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
+ }
+-#endif
+-
+ /*
+  *    Allocate a dst for local (unicast / anycast) address.
+  */
diff --git a/queue-3.10/macvtap-do-not-double-count-received-packets.patch b/queue-3.10/macvtap-do-not-double-count-received-packets.patch
new file mode 100644 (file)
index 0000000..e105a20
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Vlad Yasevich <vyasevic@redhat.com>
+Date: Tue, 26 Nov 2013 12:37:12 -0500
+Subject: macvtap: Do not double-count received packets
+
+From: Vlad Yasevich <vyasevic@redhat.com>
+
+[ Upstream commit 006da7b07bc4d3a7ffabad17cf639eec6849c9dc ]
+
+Currently macvlan will count received packets after calling each
+vlans receive handler.   Macvtap attempts to count the packet
+yet again when the user reads the packet from the tap socket.
+This code doesn't do this consistently either.  Remove the
+counting from macvtap and let only macvlan count received
+packets.
+
+Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |    7 -------
+ 1 file changed, 7 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -797,7 +797,6 @@ static ssize_t macvtap_put_user(struct m
+                               const struct sk_buff *skb,
+                               const struct iovec *iv, int len)
+ {
+-      struct macvlan_dev *vlan;
+       int ret;
+       int vnet_hdr_len = 0;
+       int vlan_offset = 0;
+@@ -851,12 +850,6 @@ static ssize_t macvtap_put_user(struct m
+       copied += len;
+ done:
+-      rcu_read_lock_bh();
+-      vlan = rcu_dereference_bh(q->vlan);
+-      if (vlan)
+-              macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
+-      rcu_read_unlock_bh();
+-
+       return ret ? ret : copied;
+ }
diff --git a/queue-3.10/macvtap-signal-truncated-packets.patch b/queue-3.10/macvtap-signal-truncated-packets.patch
new file mode 100644 (file)
index 0000000..aa63542
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 11 Dec 2013 13:08:34 +0800
+Subject: macvtap: signal truncated packets
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit ce232ce01d61b184202bb185103d119820e1260c ]
+
+macvtap_put_user() never return a value grater than iov length, this in fact
+bypasses the truncated checking in macvtap_recvmsg(). Fix this by always
+returning the size of packet plus the possible vlan header to let the trunca
+checking work.
+
+Cc: Vlad Yasevich <vyasevich@gmail.com>
+Cc: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Vlad Yasevich <vyasevich@gmail.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -800,7 +800,7 @@ static ssize_t macvtap_put_user(struct m
+       int ret;
+       int vnet_hdr_len = 0;
+       int vlan_offset = 0;
+-      int copied;
++      int copied, total;
+       if (q->flags & IFF_VNET_HDR) {
+               struct virtio_net_hdr vnet_hdr;
+@@ -815,7 +815,8 @@ static ssize_t macvtap_put_user(struct m
+               if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))
+                       return -EFAULT;
+       }
+-      copied = vnet_hdr_len;
++      total = copied = vnet_hdr_len;
++      total += skb->len;
+       if (!vlan_tx_tag_present(skb))
+               len = min_t(int, skb->len, len);
+@@ -830,6 +831,7 @@ static ssize_t macvtap_put_user(struct m
+               vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
+               len = min_t(int, skb->len + VLAN_HLEN, len);
++              total += VLAN_HLEN;
+               copy = min_t(int, vlan_offset, len);
+               ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy);
+@@ -847,10 +849,9 @@ static ssize_t macvtap_put_user(struct m
+       }
+       ret = skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len);
+-      copied += len;
+ done:
+-      return ret ? ret : copied;
++      return ret ? ret : total;
+ }
+ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
+@@ -902,7 +903,7 @@ static ssize_t macvtap_aio_read(struct k
+       }
+       ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
+-      ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
++      ret = min_t(ssize_t, ret, len);
+       if (ret > 0)
+               iocb->ki_pos = ret;
+ out:
diff --git a/queue-3.10/macvtap-update-file-current-position.patch b/queue-3.10/macvtap-update-file-current-position.patch
new file mode 100644 (file)
index 0000000..fae6177
--- /dev/null
@@ -0,0 +1,27 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+Date: Fri, 6 Dec 2013 14:16:50 +0800
+Subject: macvtap: update file current position
+
+From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+
+[ Upstream commit e6ebc7f16ca1434a334647aa56399c546be4e64b ]
+
+Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -903,6 +903,8 @@ static ssize_t macvtap_aio_read(struct k
+       ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
+       ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
++      if (ret > 0)
++              iocb->ki_pos = ret;
+ out:
+       return ret;
+ }
diff --git a/queue-3.10/net-do-not-pretend-fraglist-support.patch b/queue-3.10/net-do-not-pretend-fraglist-support.patch
new file mode 100644 (file)
index 0000000..57e18e6
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 2 Dec 2013 08:51:13 -0800
+Subject: net: do not pretend FRAGLIST support
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 28e24c62ab3062e965ef1b3bcc244d50aee7fa85 ]
+
+Few network drivers really supports frag_list : virtual drivers.
+
+Some drivers wrongly advertise NETIF_F_FRAGLIST feature.
+
+If skb with a frag_list is given to them, packet on the wire will be
+corrupt.
+
+Remove this flag, as core networking stack will make sure to
+provide packets that can be sent without corruption.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
+Cc: Anirudha Sarangi <anirudh@xilinx.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ehea/ehea_main.c         |    2 +-
+ drivers/net/ethernet/tehuti/tehuti.c              |    1 -
+ drivers/net/ethernet/xilinx/ll_temac_main.c       |    2 +-
+ drivers/net/ethernet/xilinx/xilinx_axienet_main.c |    2 +-
+ 4 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
++++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
+@@ -3023,7 +3023,7 @@ static struct ehea_port *ehea_setup_sing
+       dev->hw_features = NETIF_F_SG | NETIF_F_TSO |
+                     NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX;
+-      dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO |
++      dev->features = NETIF_F_SG | NETIF_F_TSO |
+                     NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
+                     NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+                     NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM;
+--- a/drivers/net/ethernet/tehuti/tehuti.c
++++ b/drivers/net/ethernet/tehuti/tehuti.c
+@@ -2019,7 +2019,6 @@ bdx_probe(struct pci_dev *pdev, const st
+               ndev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO
+                   | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+                   NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM
+-                  /*| NETIF_F_FRAGLIST */
+                   ;
+               ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+                       NETIF_F_TSO | NETIF_F_HW_VLAN_CTAG_TX;
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1016,7 +1016,7 @@ static int temac_of_probe(struct platfor
+       dev_set_drvdata(&op->dev, ndev);
+       SET_NETDEV_DEV(ndev, &op->dev);
+       ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
+-      ndev->features = NETIF_F_SG | NETIF_F_FRAGLIST;
++      ndev->features = NETIF_F_SG;
+       ndev->netdev_ops = &temac_netdev_ops;
+       ndev->ethtool_ops = &temac_ethtool_ops;
+ #if 0
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+@@ -1488,7 +1488,7 @@ static int axienet_of_probe(struct platf
+       SET_NETDEV_DEV(ndev, &op->dev);
+       ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
+-      ndev->features = NETIF_F_SG | NETIF_F_FRAGLIST;
++      ndev->features = NETIF_F_SG;
+       ndev->netdev_ops = &axienet_netdev_ops;
+       ndev->ethtool_ops = &axienet_ethtool_ops;
diff --git a/queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch b/queue-3.10/net-drop_monitor-fix-the-value-of-maxattr.patch
new file mode 100644 (file)
index 0000000..794be69
--- /dev/null
@@ -0,0 +1,29 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Changli Gao <xiaosuo@gmail.com>
+Date: Sun, 8 Dec 2013 09:36:56 -0500
+Subject: net: drop_monitor: fix the value of maxattr
+
+From: Changli Gao <xiaosuo@gmail.com>
+
+[ Upstream commit d323e92cc3f4edd943610557c9ea1bb4bb5056e8 ]
+
+maxattr in genl_family should be used to save the max attribute
+type, but not the max command type. Drop monitor doesn't support
+any attributes, so we should leave it as zero.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/drop_monitor.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -64,7 +64,6 @@ static struct genl_family net_drop_monit
+       .hdrsize        = 0,
+       .name           = "NET_DM",
+       .version        = 2,
+-      .maxattr        = NET_DM_CMD_MAX,
+ };
+ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
diff --git a/queue-3.10/net-fec-fix-potential-use-after-free.patch b/queue-3.10/net-fec-fix-potential-use-after-free.patch
new file mode 100644 (file)
index 0000000..f1f02ef
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 19 Dec 2013 10:53:02 -0800
+Subject: net: fec: fix potential use after free
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7a2a84518cfb263d2c4171b3d63671f88316adb2 ]
+
+skb_tx_timestamp(skb) should be called _before_ TX completion
+has a chance to trigger, otherwise it is too late and we access
+freed memory.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Fixes: de5fb0a05348 ("net: fec: put tx to napi poll function to fix dead lock")
+Cc: Frank Li <Frank.Li@freescale.com>
+Cc: Richard Cochran <richardcochran@gmail.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Acked-by: Frank Li <Frank.Li@freescale.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -371,6 +371,8 @@ fec_enet_start_xmit(struct sk_buff *skb,
+       else
+               bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex);
++      skb_tx_timestamp(skb);
++
+       fep->cur_tx = bdp;
+       if (fep->cur_tx == fep->dirty_tx)
+@@ -379,8 +381,6 @@ fec_enet_start_xmit(struct sk_buff *skb,
+       /* Trigger transmission start */
+       writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+-      skb_tx_timestamp(skb);
+-
+       return NETDEV_TX_OK;
+ }
diff --git a/queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch b/queue-3.10/net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch
new file mode 100644 (file)
index 0000000..dd0e12a
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Tue, 17 Dec 2013 00:38:39 +0100
+Subject: net: inet_diag: zero out uninitialized idiag_{src,dst} fields
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit b1aac815c0891fe4a55a6b0b715910142227700f ]
+
+Jakub reported while working with nlmon netlink sniffer that parts of
+the inet_diag_sockid are not initialized when r->idiag_family != AF_INET6.
+That is, fields of r->id.idiag_src[1 ... 3], r->id.idiag_dst[1 ... 3].
+
+In fact, it seems that we can leak 6 * sizeof(u32) byte of kernel [slab]
+memory through this. At least, in udp_dump_one(), we allocate a skb in ...
+
+  rep = nlmsg_new(sizeof(struct inet_diag_msg) + ..., GFP_KERNEL);
+
+... and then pass that to inet_sk_diag_fill() that puts the whole struct
+inet_diag_msg into the skb, where we only fill out r->id.idiag_src[0],
+r->id.idiag_dst[0] and leave the rest untouched:
+
+  r->id.idiag_src[0] = inet->inet_rcv_saddr;
+  r->id.idiag_dst[0] = inet->inet_daddr;
+
+struct inet_diag_msg embeds struct inet_diag_sockid that is correctly /
+fully filled out in IPv6 case, but for IPv4 not.
+
+So just zero them out by using plain memset (for this little amount of
+bytes it's probably not worth the extra check for idiag_family == AF_INET).
+
+Similarly, fix also other places where we fill that out.
+
+Reported-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_diag.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, s
+       r->id.idiag_sport = inet->inet_sport;
+       r->id.idiag_dport = inet->inet_dport;
++
++      memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
++      memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
++
+       r->id.idiag_src[0] = inet->inet_rcv_saddr;
+       r->id.idiag_dst[0] = inet->inet_daddr;
+@@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct in
+       r->idiag_family       = tw->tw_family;
+       r->idiag_retrans      = 0;
++
+       r->id.idiag_if        = tw->tw_bound_dev_if;
+       sock_diag_save_cookie(tw, r->id.idiag_cookie);
++
+       r->id.idiag_sport     = tw->tw_sport;
+       r->id.idiag_dport     = tw->tw_dport;
++
++      memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
++      memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
++
+       r->id.idiag_src[0]    = tw->tw_rcv_saddr;
+       r->id.idiag_dst[0]    = tw->tw_daddr;
++
+       r->idiag_state        = tw->tw_substate;
+       r->idiag_timer        = 3;
+       r->idiag_expires      = DIV_ROUND_UP(tmo * 1000, HZ);
+@@ -732,8 +743,13 @@ static int inet_diag_fill_req(struct sk_
+       r->id.idiag_sport = inet->inet_sport;
+       r->id.idiag_dport = ireq->rmt_port;
++
++      memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
++      memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
++
+       r->id.idiag_src[0] = ireq->loc_addr;
+       r->id.idiag_dst[0] = ireq->rmt_addr;
++
+       r->idiag_expires = jiffies_to_msecs(tmo);
+       r->idiag_rqueue = 0;
+       r->idiag_wqueue = 0;
diff --git a/queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch b/queue-3.10/net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch
new file mode 100644 (file)
index 0000000..75032c3
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Mon, 30 Dec 2013 23:40:50 +0100
+Subject: net: llc: fix use after free in llc_ui_recvmsg
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 4d231b76eef6c4a6bd9c96769e191517765942cb ]
+
+While commit 30a584d944fb fixes datagram interface in LLC, a use
+after free bug has been introduced for SOCK_STREAM sockets that do
+not make use of MSG_PEEK.
+
+The flow is as follow ...
+
+  if (!(flags & MSG_PEEK)) {
+    ...
+    sk_eat_skb(sk, skb, false);
+    ...
+  }
+  ...
+  if (used + offset < skb->len)
+    continue;
+
+... where sk_eat_skb() calls __kfree_skb(). Therefore, cache
+original length and work on skb_len to check partial reads.
+
+Fixes: 30a584d944fb ("[LLX]: SOCK_DGRAM interface fixes")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/llc/af_llc.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/llc/af_llc.c
++++ b/net/llc/af_llc.c
+@@ -715,7 +715,7 @@ static int llc_ui_recvmsg(struct kiocb *
+       unsigned long cpu_flags;
+       size_t copied = 0;
+       u32 peek_seq = 0;
+-      u32 *seq;
++      u32 *seq, skb_len;
+       unsigned long used;
+       int target;     /* Read at least this many bytes */
+       long timeo;
+@@ -812,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *
+               }
+               continue;
+       found_ok_skb:
++              skb_len = skb->len;
+               /* Ok so how much can we use? */
+               used = skb->len - offset;
+               if (len < used)
+@@ -844,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *
+               }
+               /* Partial read */
+-              if (used + offset < skb->len)
++              if (used + offset < skb_len)
+                       continue;
+       } while (len > 0);
diff --git a/queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch b/queue-3.10/net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch
new file mode 100644 (file)
index 0000000..30379bc
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Simon Horman <horms@verge.net.au>
+Date: Sun, 19 May 2013 15:46:49 +0000
+Subject: net: Loosen constraints for recalculating checksum in skb_segment()
+
+From: Simon Horman <horms@verge.net.au>
+
+[ Upstream commit 1cdbcb7957cf9e5f841dbcde9b38fd18a804208b ]
+
+This is a generic solution to resolve a specific problem that I have observed.
+
+If the encapsulation of an skb changes then ability to offload checksums
+may also change. In particular it may be necessary to perform checksumming
+in software.
+
+An example of such a case is where a non-GRE packet is received but
+is to be encapsulated and transmitted as GRE.
+
+Another example relates to my proposed support for for packets
+that are non-MPLS when received but MPLS when transmitted.
+
+The cost of this change is that the value of the csum variable may be
+checked when it previously was not. In the case where the csum variable is
+true this is pure overhead. In the case where the csum variable is false it
+leads to software checksumming, which I believe also leads to correct
+checksums in transmitted packets for the cases described above.
+
+Further analysis:
+
+This patch relies on the return value of can_checksum_protocol()
+being correct and in turn the return value of skb_network_protocol(),
+used to provide the protocol parameter of can_checksum_protocol(),
+being correct. It also relies on the features passed to skb_segment()
+and in turn to can_checksum_protocol() being correct.
+
+I believe that this problem has not been observed for VLANs because it
+appears that almost all drivers, the exception being xgbe, set
+vlan_features such that that the checksum offload support for VLAN packets
+is greater than or equal to that of non-VLAN packets.
+
+I wonder if the code in xgbe may be an oversight and the hardware does
+support checksumming of VLAN packets.  If so it may be worth updating the
+vlan_features of the driver as this patch will force such checksums to be
+performed in software rather than hardware.
+
+Signed-off-by: Simon Horman <horms@verge.net.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2854,7 +2854,7 @@ struct sk_buff *skb_segment(struct sk_bu
+                                                doffset + tnl_hlen);
+               if (fskb != skb_shinfo(skb)->frag_list)
+-                      continue;
++                      goto perform_csum_check;
+               if (!sg) {
+                       nskb->ip_summed = CHECKSUM_NONE;
+@@ -2918,6 +2918,7 @@ skip_fraglist:
+               nskb->len += nskb->data_len;
+               nskb->truesize += nskb->data_len;
++perform_csum_check:
+               if (!csum) {
+                       nskb->csum = skb_checksum(nskb, doffset,
+                                                 nskb->len - doffset, 0);
diff --git a/queue-3.10/net-rose-restore-old-recvmsg-behavior.patch b/queue-3.10/net-rose-restore-old-recvmsg-behavior.patch
new file mode 100644 (file)
index 0000000..e15ebb3
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Mon, 23 Dec 2013 00:32:31 +0100
+Subject: net: rose: restore old recvmsg behavior
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit f81152e35001e91997ec74a7b4e040e6ab0acccf ]
+
+recvmsg handler in net/rose/af_rose.c performs size-check ->msg_namelen.
+
+After commit f3d3342602f8bcbf37d7c46641cb9bca7618eb1c
+(net: rework recvmsg handler msg_name and msg_namelen logic), we now
+always take the else branch due to namelen being initialized to 0.
+
+Digging in netdev-vger-cvs git repo shows that msg_namelen was
+initialized with a fixed-size since at least 1995, so the else branch
+was never taken.
+
+Compile tested only.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rose/af_rose.c |   16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+--- a/net/rose/af_rose.c
++++ b/net/rose/af_rose.c
+@@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *io
+       if (msg->msg_name) {
+               struct sockaddr_rose *srose;
++              struct full_sockaddr_rose *full_srose = msg->msg_name;
+               memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose));
+               srose = msg->msg_name;
+@@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *io
+               srose->srose_addr   = rose->dest_addr;
+               srose->srose_call   = rose->dest_call;
+               srose->srose_ndigis = rose->dest_ndigis;
+-              if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) {
+-                      struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name;
+-                      for (n = 0 ; n < rose->dest_ndigis ; n++)
+-                              full_srose->srose_digis[n] = rose->dest_digis[n];
+-                      msg->msg_namelen = sizeof(struct full_sockaddr_rose);
+-              } else {
+-                      if (rose->dest_ndigis >= 1) {
+-                              srose->srose_ndigis = 1;
+-                              srose->srose_digi = rose->dest_digis[0];
+-                      }
+-                      msg->msg_namelen = sizeof(struct sockaddr_rose);
+-              }
++              for (n = 0 ; n < rose->dest_ndigis ; n++)
++                      full_srose->srose_digis[n] = rose->dest_digis[n];
++              msg->msg_namelen = sizeof(struct full_sockaddr_rose);
+       }
+       skb_free_datagram(sk, skb);
diff --git a/queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch b/queue-3.10/net-unix-allow-bind-to-fail-on-mutex-lock.patch
new file mode 100644 (file)
index 0000000..9d847c8
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Fri, 13 Dec 2013 10:54:22 -0500
+Subject: net: unix: allow bind to fail on mutex lock
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+[ Upstream commit 37ab4fa7844a044dc21fde45e2a0fc2f3c3b6490 ]
+
+This is similar to the set_peek_off patch where calling bind while the
+socket is stuck in unix_dgram_recvmsg() will block and cause a hung task
+spew after a while.
+
+This is also the last place that did a straightforward mutex_lock(), so
+there shouldn't be any more of these patches.
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -717,7 +717,9 @@ static int unix_autobind(struct socket *
+       int err;
+       unsigned int retries = 0;
+-      mutex_lock(&u->readlock);
++      err = mutex_lock_interruptible(&u->readlock);
++      if (err)
++              return err;
+       err = 0;
+       if (u->addr)
+@@ -876,7 +878,9 @@ static int unix_bind(struct socket *sock
+               goto out;
+       addr_len = err;
+-      mutex_lock(&u->readlock);
++      err = mutex_lock_interruptible(&u->readlock);
++      if (err)
++              goto out;
+       err = -EINVAL;
+       if (u->addr)
diff --git a/queue-3.10/net-unix-allow-set_peek_off-to-fail.patch b/queue-3.10/net-unix-allow-set_peek_off-to-fail.patch
new file mode 100644 (file)
index 0000000..37db76f
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Sat, 7 Dec 2013 17:26:27 -0500
+Subject: net: unix: allow set_peek_off to fail
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+[ Upstream commit 12663bfc97c8b3fdb292428105dd92d563164050 ]
+
+unix_dgram_recvmsg() will hold the readlock of the socket until recv
+is complete.
+
+In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until
+unix_dgram_recvmsg() will complete (which can take a while) without allowing
+us to break out of it, triggering a hung task spew.
+
+Instead, allow set_peek_off to fail, this way userspace will not hang.
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Acked-by: Pavel Emelyanov <xemul@parallels.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/net.h |    2 +-
+ net/core/sock.c     |    2 +-
+ net/unix/af_unix.c  |    8 ++++++--
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/include/linux/net.h
++++ b/include/linux/net.h
+@@ -180,7 +180,7 @@ struct proto_ops {
+                                     int offset, size_t size, int flags);
+       ssize_t         (*splice_read)(struct socket *sock,  loff_t *ppos,
+                                      struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+-      void            (*set_peek_off)(struct sock *sk, int val);
++      int             (*set_peek_off)(struct sock *sk, int val);
+ };
+ #define DECLARE_SOCKADDR(type, dst, src)      \
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -885,7 +885,7 @@ set_rcvbuf:
+       case SO_PEEK_OFF:
+               if (sock->ops->set_peek_off)
+-                      sock->ops->set_peek_off(sk, val);
++                      ret = sock->ops->set_peek_off(sk, val);
+               else
+                       ret = -EOPNOTSUPP;
+               break;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -529,13 +529,17 @@ static int unix_seqpacket_sendmsg(struct
+ static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
+                                 struct msghdr *, size_t, int);
+-static void unix_set_peek_off(struct sock *sk, int val)
++static int unix_set_peek_off(struct sock *sk, int val)
+ {
+       struct unix_sock *u = unix_sk(sk);
+-      mutex_lock(&u->readlock);
++      if (mutex_lock_interruptible(&u->readlock))
++              return -EINTR;
++
+       sk->sk_peek_off = val;
+       mutex_unlock(&u->readlock);
++
++      return 0;
+ }
diff --git a/queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch b/queue-3.10/netpoll-fix-missing-txq-unlock-and-and-oops.patch
new file mode 100644 (file)
index 0000000..feabd84
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Thu, 2 Jan 2014 19:50:52 -0500
+Subject: netpoll: Fix missing TXQ unlock and and OOPS.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit aca5f58f9ba803ec8c2e6bcf890db17589e8dfcc ]
+
+The VLAN tag handling code in netpoll_send_skb_on_dev() has two problems.
+
+1) It exits without unlocking the TXQ.
+
+2) It then tries to queue a NULL skb to npinfo->txq.
+
+Reported-by: Ahmed Tamrawi <atamrawi@iastate.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/netpoll.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netp
+                                           !vlan_hw_offload_capable(netif_skb_features(skb),
+                                                                    skb->vlan_proto)) {
+                                               skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
+-                                              if (unlikely(!skb))
+-                                                      break;
++                                              if (unlikely(!skb)) {
++                                                      /* This is actually a packet drop, but we
++                                                       * don't want the code at the end of this
++                                                       * function to try and re-queue a NULL skb.
++                                                       */
++                                                      status = NETDEV_TX_OK;
++                                                      goto unlock_txq;
++                                              }
+                                               skb->vlan_tci = 0;
+                                       }
+@@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netp
+                                       if (status == NETDEV_TX_OK)
+                                               txq_trans_update(txq);
+                               }
++                      unlock_txq:
+                               __netif_tx_unlock(txq);
+                               if (status == NETDEV_TX_OK)
diff --git a/queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch b/queue-3.10/netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch
new file mode 100644 (file)
index 0000000..76baed6
--- /dev/null
@@ -0,0 +1,90 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 13 Dec 2013 17:21:27 +0800
+Subject: netvsc: don't flush peers notifying work during setting mtu
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 50dc875f2e6e2e04aed3b3033eb0ac99192d6d02 ]
+
+There's a possible deadlock if we flush the peers notifying work during setting
+mtu:
+
+[   22.991149] ======================================================
+[   22.991173] [ INFO: possible circular locking dependency detected ]
+[   22.991198] 3.10.0-54.0.1.el7.x86_64.debug #1 Not tainted
+[   22.991219] -------------------------------------------------------
+[   22.991243] ip/974 is trying to acquire lock:
+[   22.991261]  ((&(&net_device_ctx->dwork)->work)){+.+.+.}, at: [<ffffffff8108af95>] flush_work+0x5/0x2e0
+[   22.991307]
+but task is already holding lock:
+[   22.991330]  (rtnl_mutex){+.+.+.}, at: [<ffffffff81539deb>] rtnetlink_rcv+0x1b/0x40
+[   22.991367]
+which lock already depends on the new lock.
+
+[   22.991398]
+the existing dependency chain (in reverse order) is:
+[   22.991426]
+-> #1 (rtnl_mutex){+.+.+.}:
+[   22.991449]        [<ffffffff810dfdd9>] __lock_acquire+0xb19/0x1260
+[   22.991477]        [<ffffffff810e0d12>] lock_acquire+0xa2/0x1f0
+[   22.991501]        [<ffffffff81673659>] mutex_lock_nested+0x89/0x4f0
+[   22.991529]        [<ffffffff815392b7>] rtnl_lock+0x17/0x20
+[   22.991552]        [<ffffffff815230b2>] netdev_notify_peers+0x12/0x30
+[   22.991579]        [<ffffffffa0340212>] netvsc_send_garp+0x22/0x30 [hv_netvsc]
+[   22.991610]        [<ffffffff8108d251>] process_one_work+0x211/0x6e0
+[   22.991637]        [<ffffffff8108d83b>] worker_thread+0x11b/0x3a0
+[   22.991663]        [<ffffffff81095e5d>] kthread+0xed/0x100
+[   22.991686]        [<ffffffff81681c6c>] ret_from_fork+0x7c/0xb0
+[   22.991715]
+-> #0 ((&(&net_device_ctx->dwork)->work)){+.+.+.}:
+[   22.991715]        [<ffffffff810de817>] check_prevs_add+0x967/0x970
+[   22.991715]        [<ffffffff810dfdd9>] __lock_acquire+0xb19/0x1260
+[   22.991715]        [<ffffffff810e0d12>] lock_acquire+0xa2/0x1f0
+[   22.991715]        [<ffffffff8108afde>] flush_work+0x4e/0x2e0
+[   22.991715]        [<ffffffff8108e1b5>] __cancel_work_timer+0x95/0x130
+[   22.991715]        [<ffffffff8108e303>] cancel_delayed_work_sync+0x13/0x20
+[   22.991715]        [<ffffffffa03404e4>] netvsc_change_mtu+0x84/0x200 [hv_netvsc]
+[   22.991715]        [<ffffffff815233d4>] dev_set_mtu+0x34/0x80
+[   22.991715]        [<ffffffff8153bc2a>] do_setlink+0x23a/0xa00
+[   22.991715]        [<ffffffff8153d054>] rtnl_newlink+0x394/0x5e0
+[   22.991715]        [<ffffffff81539eac>] rtnetlink_rcv_msg+0x9c/0x260
+[   22.991715]        [<ffffffff8155cdd9>] netlink_rcv_skb+0xa9/0xc0
+[   22.991715]        [<ffffffff81539dfa>] rtnetlink_rcv+0x2a/0x40
+[   22.991715]        [<ffffffff8155c41d>] netlink_unicast+0xdd/0x190
+[   22.991715]        [<ffffffff8155c807>] netlink_sendmsg+0x337/0x750
+[   22.991715]        [<ffffffff8150d219>] sock_sendmsg+0x99/0xd0
+[   22.991715]        [<ffffffff8150d63e>] ___sys_sendmsg+0x39e/0x3b0
+[   22.991715]        [<ffffffff8150eba2>] __sys_sendmsg+0x42/0x80
+[   22.991715]        [<ffffffff8150ebf2>] SyS_sendmsg+0x12/0x20
+[   22.991715]        [<ffffffff81681d19>] system_call_fastpath+0x16/0x1b
+
+This is because we hold the rtnl_lock() before ndo_change_mtu() and try to flush
+the work in netvsc_change_mtu(), in the mean time, netdev_notify_peers() may be
+called from worker and also trying to hold the rtnl_lock. This will lead the
+flush won't succeed forever. Solve this by not canceling and flushing the work,
+this is safe because the transmission done by NETDEV_NOTIFY_PEERS was
+synchronized with the netif_tx_disable() called by netvsc_change_mtu().
+
+Reported-by: Yaju Cao <yacao@redhat.com>
+Tested-by: Yaju Cao <yacao@redhat.com>
+Cc: K. Y. Srinivasan <kys@microsoft.com>
+Cc: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -328,7 +328,6 @@ static int netvsc_change_mtu(struct net_
+               return -EINVAL;
+       nvdev->start_remove = true;
+-      cancel_delayed_work_sync(&ndevctx->dwork);
+       cancel_work_sync(&ndevctx->work);
+       netif_tx_disable(ndev);
+       rndis_filter_device_remove(hdev);
diff --git a/queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch b/queue-3.10/packet-fix-send-path-when-running-with-proto-0.patch
new file mode 100644 (file)
index 0000000..53947a6
--- /dev/null
@@ -0,0 +1,206 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Fri, 6 Dec 2013 11:36:15 +0100
+Subject: packet: fix send path when running with proto == 0
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 66e56cd46b93ef407c60adcac62cf33b06119d50 ]
+
+Commit e40526cb20b5 introduced a cached dev pointer, that gets
+hooked into register_prot_hook(), __unregister_prot_hook() to
+update the device used for the send path.
+
+We need to fix this up, as otherwise this will not work with
+sockets created with protocol = 0, plus with sll_protocol = 0
+passed via sockaddr_ll when doing the bind.
+
+So instead, assign the pointer directly. The compiler can inline
+these helper functions automagically.
+
+While at it, also assume the cached dev fast-path as likely(),
+and document this variant of socket creation as it seems it is
+not widely used (seems not even the author of TX_RING was aware
+of that in his reference example [1]). Tested with reproducer
+from e40526cb20b5.
+
+ [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example
+
+Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Tested-by: Salam Noureddine <noureddine@aristanetworks.com>
+Tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/networking/packet_mmap.txt |   10 ++++
+ net/packet/af_packet.c                   |   65 +++++++++++++++++++------------
+ 2 files changed, 50 insertions(+), 25 deletions(-)
+
+--- a/Documentation/networking/packet_mmap.txt
++++ b/Documentation/networking/packet_mmap.txt
+@@ -123,6 +123,16 @@ Transmission process is similar to captu
+ [shutdown]  close() --------> destruction of the transmission socket and
+                               deallocation of all associated resources.
++Socket creation and destruction is also straight forward, and is done
++the same way as in capturing described in the previous paragraph:
++
++ int fd = socket(PF_PACKET, mode, 0);
++
++The protocol can optionally be 0 in case we only want to transmit
++via this socket, which avoids an expensive call to packet_rcv().
++In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
++set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
++
+ Binding the socket to your network interface is mandatory (with zero copy) to
+ know the header size of frames used in the circular buffer.
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -237,6 +237,30 @@ struct packet_skb_cb {
+ static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
+ static void __fanout_link(struct sock *sk, struct packet_sock *po);
++static struct net_device *packet_cached_dev_get(struct packet_sock *po)
++{
++      struct net_device *dev;
++
++      rcu_read_lock();
++      dev = rcu_dereference(po->cached_dev);
++      if (likely(dev))
++              dev_hold(dev);
++      rcu_read_unlock();
++
++      return dev;
++}
++
++static void packet_cached_dev_assign(struct packet_sock *po,
++                                   struct net_device *dev)
++{
++      rcu_assign_pointer(po->cached_dev, dev);
++}
++
++static void packet_cached_dev_reset(struct packet_sock *po)
++{
++      RCU_INIT_POINTER(po->cached_dev, NULL);
++}
++
+ /* register_prot_hook must be invoked with the po->bind_lock held,
+  * or from a context in which asynchronous accesses to the packet
+  * socket is not possible (packet_create()).
+@@ -246,12 +270,10 @@ static void register_prot_hook(struct so
+       struct packet_sock *po = pkt_sk(sk);
+       if (!po->running) {
+-              if (po->fanout) {
++              if (po->fanout)
+                       __fanout_link(sk, po);
+-              } else {
++              else
+                       dev_add_pack(&po->prot_hook);
+-                      rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+-              }
+               sock_hold(sk);
+               po->running = 1;
+@@ -270,12 +292,11 @@ static void __unregister_prot_hook(struc
+       struct packet_sock *po = pkt_sk(sk);
+       po->running = 0;
+-      if (po->fanout) {
++
++      if (po->fanout)
+               __fanout_unlink(sk, po);
+-      } else {
++      else
+               __dev_remove_pack(&po->prot_hook);
+-              RCU_INIT_POINTER(po->cached_dev, NULL);
+-      }
+       __sock_put(sk);
+@@ -2048,19 +2069,6 @@ static int tpacket_fill_skb(struct packe
+       return tp_len;
+ }
+-static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+-{
+-      struct net_device *dev;
+-
+-      rcu_read_lock();
+-      dev = rcu_dereference(po->cached_dev);
+-      if (dev)
+-              dev_hold(dev);
+-      rcu_read_unlock();
+-
+-      return dev;
+-}
+-
+ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
+ {
+       struct sk_buff *skb;
+@@ -2077,7 +2085,7 @@ static int tpacket_snd(struct packet_soc
+       mutex_lock(&po->pg_vec_lock);
+-      if (saddr == NULL) {
++      if (likely(saddr == NULL)) {
+               dev     = packet_cached_dev_get(po);
+               proto   = po->num;
+               addr    = NULL;
+@@ -2231,7 +2239,7 @@ static int packet_snd(struct socket *soc
+        *      Get and verify the address.
+        */
+-      if (saddr == NULL) {
++      if (likely(saddr == NULL)) {
+               dev     = packet_cached_dev_get(po);
+               proto   = po->num;
+               addr    = NULL;
+@@ -2440,6 +2448,8 @@ static int packet_release(struct socket
+       spin_lock(&po->bind_lock);
+       unregister_prot_hook(sk, false);
++      packet_cached_dev_reset(po);
++
+       if (po->prot_hook.dev) {
+               dev_put(po->prot_hook.dev);
+               po->prot_hook.dev = NULL;
+@@ -2495,14 +2505,17 @@ static int packet_do_bind(struct sock *s
+       spin_lock(&po->bind_lock);
+       unregister_prot_hook(sk, true);
++
+       po->num = protocol;
+       po->prot_hook.type = protocol;
+       if (po->prot_hook.dev)
+               dev_put(po->prot_hook.dev);
+-      po->prot_hook.dev = dev;
++      po->prot_hook.dev = dev;
+       po->ifindex = dev ? dev->ifindex : 0;
++      packet_cached_dev_assign(po, dev);
++
+       if (protocol == 0)
+               goto out_unlock;
+@@ -2615,7 +2628,8 @@ static int packet_create(struct net *net
+       po = pkt_sk(sk);
+       sk->sk_family = PF_PACKET;
+       po->num = proto;
+-      RCU_INIT_POINTER(po->cached_dev, NULL);
++
++      packet_cached_dev_reset(po);
+       sk->sk_destruct = packet_sock_destruct;
+       sk_refcnt_debug_inc(sk);
+@@ -3369,6 +3383,7 @@ static int packet_notifier(struct notifi
+                                               sk->sk_error_report(sk);
+                               }
+                               if (msg == NETDEV_UNREGISTER) {
++                                      packet_cached_dev_reset(po);
+                                       po->ifindex = -1;
+                                       if (po->prot_hook.dev)
+                                               dev_put(po->prot_hook.dev);
diff --git a/queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch b/queue-3.10/rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch
new file mode 100644 (file)
index 0000000..ebd954e
--- /dev/null
@@ -0,0 +1,90 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+Date: Mon, 2 Dec 2013 15:41:39 -0800
+Subject: rds: prevent BUG_ON triggered on congestion update to loopback
+
+From: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+
+[ Upstream commit 18fc25c94eadc52a42c025125af24657a93638c0 ]
+
+After congestion update on a local connection, when rds_ib_xmit returns
+less bytes than that are there in the message, rds_send_xmit calls
+back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE)
+to trigger.
+
+For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually
+the message contains 8240 bytes. rds_send_xmit thinks there is more to send
+and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header)
+=4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k].
+
+The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f
+"rds: prevent BUG_ON triggering on congestion map updates" introduced
+this regression. That change was addressing the triggering of a different
+BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE:
+       BUG_ON(ret != 0 &&
+                conn->c_xmit_sg == rm->data.op_nents);
+This was the sequence it was going through:
+(rds_ib_xmit)
+/* Do not send cong updates to IB loopback */
+if (conn->c_loopback
+   && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+       rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+       return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+}
+rds_ib_xmit returns 8240
+rds_send_xmit:
+  c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time)
+                = 8192
+  c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again
+rds_ib_xmit returns 8240
+rds_send_xmit:
+  c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again
+  and so on (c_xmit_data_off 24672,32912,41152,49392,57632)
+rds_ib_xmit returns 8240
+On this iteration this sequence causes the BUG_ON in rds_send_xmit:
+    while (ret) {
+       tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off);
+       [tmp = 65536 - 57632 = 7904]
+       conn->c_xmit_data_off += tmp;
+       [c_xmit_data_off = 57632 + 7904 = 65536]
+       ret -= tmp;
+       [ret = 8240 - 7904 = 336]
+       if (conn->c_xmit_data_off == sg->length) {
+               conn->c_xmit_data_off = 0;
+               sg++;
+               conn->c_xmit_sg++;
+               BUG_ON(ret != 0 &&
+                       conn->c_xmit_sg == rm->data.op_nents);
+               [c_xmit_sg = 1, rm->data.op_nents = 1]
+
+What the current fix does:
+Since the congestion update over loopback is not actually transmitted
+as a message, all that rds_ib_xmit needs to do is let the caller think
+the full message has been transmitted and not return partial bytes.
+It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb.
+And 64Kb+48 when page size is 64Kb.
+
+Reported-by: Josh Hunt <joshhunt00@gmail.com>
+Tested-by: Honggang Li <honli@redhat.com>
+Acked-by: Bang Nguyen <bang.nguyen@oracle.com>
+Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/ib_send.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/rds/ib_send.c
++++ b/net/rds/ib_send.c
+@@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *c
+           && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+               rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+               scat = &rm->data.op_sg[sg];
+-              ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+-              ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
+-              return ret;
++              ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length);
++              return sizeof(struct rds_header) + ret;
+       }
+       /* FIXME we may overallocate here */
diff --git a/queue-3.10/rds-prevent-dereference-of-a-null-device.patch b/queue-3.10/rds-prevent-dereference-of-a-null-device.patch
new file mode 100644 (file)
index 0000000..692f7a0
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Wed, 18 Dec 2013 23:49:42 -0500
+Subject: rds: prevent dereference of a NULL device
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+[ Upstream commit c2349758acf1874e4c2b93fe41d072336f1a31d0 ]
+
+Binding might result in a NULL device, which is dereferenced
+causing this BUG:
+
+[ 1317.260548] BUG: unable to handle kernel NULL pointer dereference at 000000000000097
+4
+[ 1317.261847] IP: [<ffffffff84225f52>] rds_ib_laddr_check+0x82/0x110
+[ 1317.263315] PGD 418bcb067 PUD 3ceb21067 PMD 0
+[ 1317.263502] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+[ 1317.264179] Dumping ftrace buffer:
+[ 1317.264774]    (ftrace buffer empty)
+[ 1317.265220] Modules linked in:
+[ 1317.265824] CPU: 4 PID: 836 Comm: trinity-child46 Tainted: G        W    3.13.0-rc4-
+next-20131218-sasha-00013-g2cebb9b-dirty #4159
+[ 1317.267415] task: ffff8803ddf33000 ti: ffff8803cd31a000 task.ti: ffff8803cd31a000
+[ 1317.268399] RIP: 0010:[<ffffffff84225f52>]  [<ffffffff84225f52>] rds_ib_laddr_check+
+0x82/0x110
+[ 1317.269670] RSP: 0000:ffff8803cd31bdf8  EFLAGS: 00010246
+[ 1317.270230] RAX: 0000000000000000 RBX: ffff88020b0dd388 RCX: 0000000000000000
+[ 1317.270230] RDX: ffffffff8439822e RSI: 00000000000c000a RDI: 0000000000000286
+[ 1317.270230] RBP: ffff8803cd31be38 R08: 0000000000000000 R09: 0000000000000000
+[ 1317.270230] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
+[ 1317.270230] R13: 0000000054086700 R14: 0000000000a25de0 R15: 0000000000000031
+[ 1317.270230] FS:  00007ff40251d700(0000) GS:ffff88022e200000(0000) knlGS:000000000000
+0000
+[ 1317.270230] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[ 1317.270230] CR2: 0000000000000974 CR3: 00000003cd478000 CR4: 00000000000006e0
+[ 1317.270230] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 1317.270230] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602
+[ 1317.270230] Stack:
+[ 1317.270230]  0000000054086700 5408670000a25de0 5408670000000002 0000000000000000
+[ 1317.270230]  ffffffff84223542 00000000ea54c767 0000000000000000 ffffffff86d26160
+[ 1317.270230]  ffff8803cd31be68 ffffffff84223556 ffff8803cd31beb8 ffff8800c6765280
+[ 1317.270230] Call Trace:
+[ 1317.270230]  [<ffffffff84223542>] ? rds_trans_get_preferred+0x42/0xa0
+[ 1317.270230]  [<ffffffff84223556>] rds_trans_get_preferred+0x56/0xa0
+[ 1317.270230]  [<ffffffff8421c9c3>] rds_bind+0x73/0xf0
+[ 1317.270230]  [<ffffffff83e4ce62>] SYSC_bind+0x92/0xf0
+[ 1317.270230]  [<ffffffff812493f8>] ? context_tracking_user_exit+0xb8/0x1d0
+[ 1317.270230]  [<ffffffff8119313d>] ? trace_hardirqs_on+0xd/0x10
+[ 1317.270230]  [<ffffffff8107a852>] ? syscall_trace_enter+0x32/0x290
+[ 1317.270230]  [<ffffffff83e4cece>] SyS_bind+0xe/0x10
+[ 1317.270230]  [<ffffffff843a6ad0>] tracesys+0xdd/0xe2
+[ 1317.270230] Code: 00 8b 45 cc 48 8d 75 d0 48 c7 45 d8 00 00 00 00 66 c7 45 d0 02 00
+89 45 d4 48 89 df e8 78 49 76 ff 41 89 c4 85 c0 75 0c 48 8b 03 <80> b8 74 09 00 00 01 7
+4 06 41 bc 9d ff ff ff f6 05 2a b6 c2 02
+[ 1317.270230] RIP  [<ffffffff84225f52>] rds_ib_laddr_check+0x82/0x110
+[ 1317.270230]  RSP <ffff8803cd31bdf8>
+[ 1317.270230] CR2: 0000000000000974
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/ib.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/rds/ib.c
++++ b/net/rds/ib.c
+@@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 add
+       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+       /* due to this, we will claim to support iWARP devices unless we
+          check node_type. */
+-      if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
++      if (ret || !cm_id->device ||
++          cm_id->device->node_type != RDMA_NODE_IB_CA)
+               ret = -EADDRNOTAVAIL;
+       rdsdebug("addr %pI4 ret %d node type %d\n",
index 1fa5f8beab20565ee976413ca1b20e307a6145e7..ca90f026d6eadd094aec3da984dfd612eabe8672 100644 (file)
@@ -2,3 +2,36 @@ irqchip-renesas-irqc-fix-irqc_probe-error-handling.patch
 clocksource-em_sti-set-cpu_possible_mask-to-fix-smp-broadcast.patch
 gpio-rcar-r-car-gpio-irq-share-interrupt.patch
 hid-revert-revert-hid-fix-logitech-dj-missing-unifying-device-issue.patch
+ipv6-fixed-support-for-blackhole-and-prohibit-routes.patch
+net-do-not-pretend-fraglist-support.patch
+rds-prevent-bug_on-triggered-on-congestion-update-to-loopback.patch
+macvtap-do-not-double-count-received-packets.patch
+macvtap-update-file-current-position.patch
+tun-update-file-current-position.patch
+macvtap-signal-truncated-packets.patch
+virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch
+packet-fix-send-path-when-running-with-proto-0.patch
+ipv6-don-t-count-addrconf-generated-routes-against-gc-limit.patch
+net-drop_monitor-fix-the-value-of-maxattr.patch
+net-unix-allow-set_peek_off-to-fail.patch
+tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch
+netvsc-don-t-flush-peers-notifying-work-during-setting-mtu.patch
+ipv6-fix-illegal-mac_header-comparison-on-32bit.patch
+net-unix-allow-bind-to-fail-on-mutex-lock.patch
+ip_gre-fix-msg_name-parsing-for-recvfrom-recvmsg.patch
+net-inet_diag-zero-out-uninitialized-idiag_-src-dst-fields.patch
+drivers-net-hamradio-integer-overflow-in-hdlcdrv_ioctl.patch
+hamradio-yam-fix-info-leak-in-ioctl.patch
+net-fec-fix-potential-use-after-free.patch
+ipv6-always-set-the-new-created-dst-s-from-in-ip6_rt_copy.patch
+rds-prevent-dereference-of-a-null-device.patch
+net-rose-restore-old-recvmsg-behavior.patch
+vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch
+virtio_net-fix-error-handling-for-mergeable-buffers.patch
+virtio-net-make-all-rx-paths-handle-errors-consistently.patch
+virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch
+virtio-net-fix-refill-races-during-restore.patch
+net-llc-fix-use-after-free-in-llc_ui_recvmsg.patch
+netpoll-fix-missing-txq-unlock-and-and-oops.patch
+bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch
+net-loosen-constraints-for-recalculating-checksum-in-skb_segment.patch
diff --git a/queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch b/queue-3.10/tg3-initialize-reg_base_addr-at-pci-config-offset-120-to-0.patch
new file mode 100644 (file)
index 0000000..d26f6b9
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Nat Gurumoorthy <natg@google.com>
+Date: Mon, 9 Dec 2013 10:43:21 -0800
+Subject: tg3: Initialize REG_BASE_ADDR at PCI config offset 120 to 0
+
+From: Nat Gurumoorthy <natg@google.com>
+
+[ Upstream commit 388d3335575f4c056dcf7138a30f1454e2145cd8 ]
+
+The new tg3 driver leaves REG_BASE_ADDR (PCI config offset 120)
+uninitialized. From power on reset this register may have garbage in it. The
+Register Base Address register defines the device local address of a
+register. The data pointed to by this location is read or written using
+the Register Data register (PCI config offset 128). When REG_BASE_ADDR has
+garbage any read or write of Register Data Register (PCI 128) will cause the
+PCI bus to lock up. The TCO watchdog will fire and bring down the system.
+
+Signed-off-by: Nat Gurumoorthy <natg@google.com>
+Acked-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -16297,6 +16297,9 @@ static int tg3_get_invariants(struct tg3
+       /* Clear this out for sanity. */
+       tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
++      /* Clear TG3PCI_REG_BASE_ADDR to prevent hangs. */
++      tw32(TG3PCI_REG_BASE_ADDR, 0);
++
+       pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
+                             &pci_state_reg);
+       if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 &&
diff --git a/queue-3.10/tun-update-file-current-position.patch b/queue-3.10/tun-update-file-current-position.patch
new file mode 100644 (file)
index 0000000..056d7e2
--- /dev/null
@@ -0,0 +1,27 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+Date: Fri, 6 Dec 2013 14:16:51 +0800
+Subject: tun: update file current position
+
+From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+
+[ Upstream commit d0b7da8afa079ffe018ab3e92879b7138977fc8f ]
+
+Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1412,6 +1412,8 @@ static ssize_t tun_chr_aio_read(struct k
+       ret = tun_do_read(tun, tfile, iocb, iv, len,
+                         file->f_flags & O_NONBLOCK);
+       ret = min_t(ssize_t, ret, len);
++      if (ret > 0)
++              iocb->ki_pos = ret;
+ out:
+       tun_put(tun);
+       return ret;
diff --git a/queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch b/queue-3.10/virtio-delete-napi-structures-from-netdev-before-releasing-memory.patch
new file mode 100644 (file)
index 0000000..c4c890d
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Andrey Vagin <avagin@openvz.org>
+Date: Thu, 5 Dec 2013 18:36:21 +0400
+Subject: virtio: delete napi structures from netdev before releasing memory
+
+From: Andrey Vagin <avagin@openvz.org>
+
+[ Upstream commit d4fb84eefe5164f6a6ea51d0a9e26280c661a0dd ]
+
+free_netdev calls netif_napi_del too, but it's too late, because napi
+structures are placed on vi->rq. netif_napi_add() is called from
+virtnet_alloc_queues.
+
+general protection fault: 0000 [#1] SMP
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables virtio_balloon pcspkr virtio_net(-) i2c_pii
+CPU: 1 PID: 347 Comm: rmmod Not tainted 3.13.0-rc2+ #171
+Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+task: ffff8800b779c420 ti: ffff8800379e0000 task.ti: ffff8800379e0000
+RIP: 0010:[<ffffffff81322e19>]  [<ffffffff81322e19>] __list_del_entry+0x29/0xd0
+RSP: 0018:ffff8800379e1dd0  EFLAGS: 00010a83
+RAX: 6b6b6b6b6b6b6b6b RBX: ffff8800379c2fd0 RCX: dead000000200200
+RDX: 6b6b6b6b6b6b6b6b RSI: 0000000000000001 RDI: ffff8800379c2fd0
+RBP: ffff8800379e1dd0 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800379c2f90
+R13: ffff880037839160 R14: 0000000000000000 R15: 00000000013352f0
+FS:  00007f1400e34740(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+CR2: 00007f464124c763 CR3: 00000000b68cf000 CR4: 00000000000006e0
+Stack:
+ ffff8800379e1df0 ffffffff8155beab 6b6b6b6b6b6b6b2b ffff8800378391c0
+ ffff8800379e1e18 ffffffff8156499b ffff880037839be0 ffff880037839d20
+ ffff88003779d3f0 ffff8800379e1e38 ffffffffa003477c ffff88003779d388
+Call Trace:
+ [<ffffffff8155beab>] netif_napi_del+0x1b/0x80
+ [<ffffffff8156499b>] free_netdev+0x8b/0x110
+ [<ffffffffa003477c>] virtnet_remove+0x7c/0x90 [virtio_net]
+ [<ffffffff813ae323>] virtio_dev_remove+0x23/0x80
+ [<ffffffff813f62ef>] __device_release_driver+0x7f/0xf0
+ [<ffffffff813f6ca0>] driver_detach+0xc0/0xd0
+ [<ffffffff813f5f28>] bus_remove_driver+0x58/0xd0
+ [<ffffffff813f72ec>] driver_unregister+0x2c/0x50
+ [<ffffffff813ae65e>] unregister_virtio_driver+0xe/0x10
+ [<ffffffffa0036942>] virtio_net_driver_exit+0x10/0x6ce [virtio_net]
+ [<ffffffff810d7cf2>] SyS_delete_module+0x172/0x220
+ [<ffffffff810a732d>] ? trace_hardirqs_on+0xd/0x10
+ [<ffffffff810f5d4c>] ? __audit_syscall_entry+0x9c/0xf0
+ [<ffffffff81677f69>] system_call_fastpath+0x16/0x1b
+Code: 00 00 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00
+RIP  [<ffffffff81322e19>] __list_del_entry+0x29/0xd0
+ RSP <ffff8800379e1dd0>
+---[ end trace d5931cd3f87c9763 ]---
+
+Fixes: 986a4f4d452d (virtio_net: multiqueue support)
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Signed-off-by: Andrey Vagin <avagin@openvz.org>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1285,6 +1285,11 @@ static void virtnet_config_changed(struc
+ static void virtnet_free_queues(struct virtnet_info *vi)
+ {
++      int i;
++
++      for (i = 0; i < vi->max_queue_pairs; i++)
++              netif_napi_del(&vi->rq[i].napi);
++
+       kfree(vi->rq);
+       kfree(vi->sq);
+ }
diff --git a/queue-3.10/virtio-net-fix-refill-races-during-restore.patch b/queue-3.10/virtio-net-fix-refill-races-during-restore.patch
new file mode 100644 (file)
index 0000000..0e70dd3
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: Jason Wang <jasowang@redhat.com>
+Date: Mon, 30 Dec 2013 11:34:40 +0800
+Subject: virtio-net: fix refill races during restore
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 6cd4ce0099da7702f885b6fa9ebb49e3831d90b4 ]
+
+During restoring, try_fill_recv() was called with neither napi lock nor napi
+disabled. This can lead two try_fill_recv() was called in the same time. Fix
+this by refilling before trying to enable napi.
+
+Fixes 0741bcb5584f9e2390ae6261573c4de8314999f2
+(virtio: net: Add freeze, restore handlers to support S4).
+
+Cc: Amit Shah <amit.shah@redhat.com>
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1745,16 +1745,17 @@ static int virtnet_restore(struct virtio
+       if (err)
+               return err;
+-      if (netif_running(vi->dev))
++      if (netif_running(vi->dev)) {
++              for (i = 0; i < vi->curr_queue_pairs; i++)
++                      if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
++                              schedule_delayed_work(&vi->refill, 0);
++
+               for (i = 0; i < vi->max_queue_pairs; i++)
+                       virtnet_napi_enable(&vi->rq[i]);
++      }
+       netif_device_attach(vi->dev);
+-      for (i = 0; i < vi->curr_queue_pairs; i++)
+-              if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
+-                      schedule_delayed_work(&vi->refill, 0);
+-
+       mutex_lock(&vi->config_lock);
+       vi->config_enable = true;
+       mutex_unlock(&vi->config_lock);
diff --git a/queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch b/queue-3.10/virtio-net-make-all-rx-paths-handle-errors-consistently.patch
new file mode 100644 (file)
index 0000000..ce04176
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Thu, 26 Dec 2013 15:32:51 +0200
+Subject: virtio-net: make all RX paths handle errors consistently
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+receive mergeable now handles errors internally.
+Do same for big and small packet paths, otherwise
+the logic is too hard to follow.
+
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: David S. Miller <davem@davemloft.net>
+Acked-by: Michael Dalton <mwdalton@google.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit f121159d72091f25afb22007c833e60a6845e912)
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   56 ++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 20 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -294,6 +294,34 @@ static struct sk_buff *page_to_skb(struc
+       return skb;
+ }
++static struct sk_buff *receive_small(void *buf, unsigned int len)
++{
++      struct sk_buff * skb = buf;
++
++      len -= sizeof(struct virtio_net_hdr);
++      skb_trim(skb, len);
++
++      return skb;
++}
++
++static struct sk_buff *receive_big(struct net_device *dev,
++                                 struct receive_queue *rq,
++                                 void *buf)
++{
++      struct page *page = buf;
++      struct sk_buff *skb = page_to_skb(rq, page, 0);
++
++      if (unlikely(!skb))
++              goto err;
++
++      return skb;
++
++err:
++      dev->stats.rx_dropped++;
++      give_pages(rq, page);
++      return NULL;
++}
++
+ static struct sk_buff *receive_mergeable(struct net_device *dev,
+                                        struct receive_queue *rq,
+                                        void *buf,
+@@ -357,7 +385,6 @@ static void receive_buf(struct receive_q
+       struct net_device *dev = vi->dev;
+       struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
+       struct sk_buff *skb;
+-      struct page *page;
+       struct skb_vnet_hdr *hdr;
+       if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
+@@ -369,26 +396,15 @@ static void receive_buf(struct receive_q
+                       dev_kfree_skb(buf);
+               return;
+       }
++      if (vi->mergeable_rx_bufs)
++              skb = receive_mergeable(dev, rq, buf, len);
++      else if (vi->big_packets)
++              skb = receive_big(dev, rq, buf);
++      else
++              skb = receive_small(buf, len);
+-      if (!vi->mergeable_rx_bufs && !vi->big_packets) {
+-              skb = buf;
+-              len -= sizeof(struct virtio_net_hdr);
+-              skb_trim(skb, len);
+-      } else {
+-              page = buf;
+-              if (vi->mergeable_rx_bufs) {
+-                      skb = receive_mergeable(dev, rq, page, len);
+-                      if (unlikely(!skb))
+-                              return;
+-              } else {
+-                      skb = page_to_skb(rq, page, len);
+-                      if (unlikely(!skb)) {
+-                              dev->stats.rx_dropped++;
+-                              give_pages(rq, page);
+-                              return;
+-                      }
+-              }
+-      }
++      if (unlikely(!skb))
++              return;
+       hdr = skb_vnet_hdr(skb);
diff --git a/queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch b/queue-3.10/virtio_net-don-t-leak-memory-or-block-when-too-many-frags.patch
new file mode 100644 (file)
index 0000000..68ffe70
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Thu, 26 Dec 2013 15:32:55 +0200
+Subject: virtio_net: don't leak memory or block when too many frags
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+We leak an skb when there are too many frags,
+we also stop processing the packet in the middle,
+the result is almost sure to be loss of networking.
+
+Reported-by: Michael Dalton <mwdalton@google.com>
+Acked-by: Michael Dalton <mwdalton@google.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -341,7 +341,7 @@ static struct sk_buff *receive_mergeable
+               if (i >= MAX_SKB_FRAGS) {
+                       pr_debug("%s: packet too long\n", skb->dev->name);
+                       skb->dev->stats.rx_length_errors++;
+-                      return NULL;
++                      goto err_frags;
+               }
+               page = virtqueue_get_buf(rq->vq, &len);
+               if (!page) {
+@@ -362,6 +362,7 @@ static struct sk_buff *receive_mergeable
+ err_skb:
+       give_pages(rq, page);
+       while (--num_buf) {
++err_frags:
+               buf = virtqueue_get_buf(rq->vq, &len);
+               if (unlikely(!buf)) {
+                       pr_debug("%s: rx error: %d buffers missing\n",
diff --git a/queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch b/queue-3.10/virtio_net-fix-error-handling-for-mergeable-buffers.patch
new file mode 100644 (file)
index 0000000..da0db3a
--- /dev/null
@@ -0,0 +1,129 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Thu, 26 Dec 2013 15:32:47 +0200
+Subject: virtio_net: fix error handling for mergeable buffers
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+Eric Dumazet noticed that if we encounter an error
+when processing a mergeable buffer, we don't
+dequeue all of the buffers from this packet,
+the result is almost sure to be loss of networking.
+
+Fix this issue.
+
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: Michael Dalton <mwdalton@google.com>
+Acked-by: Michael Dalton <mwdalton@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: David S. Miller <davem@davemloft.net>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 8fc3b9e9a229778e5af3aa453c44f1a3857ba769)
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   66 ++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 46 insertions(+), 20 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -294,26 +294,33 @@ static struct sk_buff *page_to_skb(struc
+       return skb;
+ }
+-static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
++static struct sk_buff *receive_mergeable(struct net_device *dev,
++                                       struct receive_queue *rq,
++                                       void *buf,
++                                       unsigned int len)
+ {
+-      struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+-      struct page *page;
+-      int num_buf, i, len;
++      struct skb_vnet_hdr *hdr = page_address(buf);
++      int num_buf = hdr->mhdr.num_buffers;
++      struct page *page = buf;
++      struct sk_buff *skb = page_to_skb(rq, page, len);
++      int i;
++
++      if (unlikely(!skb))
++              goto err_skb;
+-      num_buf = hdr->mhdr.num_buffers;
+       while (--num_buf) {
+               i = skb_shinfo(skb)->nr_frags;
+               if (i >= MAX_SKB_FRAGS) {
+                       pr_debug("%s: packet too long\n", skb->dev->name);
+                       skb->dev->stats.rx_length_errors++;
+-                      return -EINVAL;
++                      return NULL;
+               }
+               page = virtqueue_get_buf(rq->vq, &len);
+               if (!page) {
+-                      pr_debug("%s: rx error: %d buffers missing\n",
+-                               skb->dev->name, hdr->mhdr.num_buffers);
+-                      skb->dev->stats.rx_length_errors++;
+-                      return -EINVAL;
++                      pr_debug("%s: rx error: %d buffers %d missing\n",
++                               dev->name, hdr->mhdr.num_buffers, num_buf);
++                      dev->stats.rx_length_errors++;
++                      goto err_buf;
+               }
+               if (len > PAGE_SIZE)
+@@ -323,7 +330,25 @@ static int receive_mergeable(struct rece
+               --rq->num;
+       }
+-      return 0;
++      return skb;
++err_skb:
++      give_pages(rq, page);
++      while (--num_buf) {
++              buf = virtqueue_get_buf(rq->vq, &len);
++              if (unlikely(!buf)) {
++                      pr_debug("%s: rx error: %d buffers missing\n",
++                               dev->name, num_buf);
++                      dev->stats.rx_length_errors++;
++                      break;
++              }
++              page = buf;
++              give_pages(rq, page);
++              --rq->num;
++      }
++err_buf:
++      dev->stats.rx_dropped++;
++      dev_kfree_skb(skb);
++      return NULL;
+ }
+ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
+@@ -351,17 +376,18 @@ static void receive_buf(struct receive_q
+               skb_trim(skb, len);
+       } else {
+               page = buf;
+-              skb = page_to_skb(rq, page, len);
+-              if (unlikely(!skb)) {
+-                      dev->stats.rx_dropped++;
+-                      give_pages(rq, page);
+-                      return;
+-              }
+-              if (vi->mergeable_rx_bufs)
+-                      if (receive_mergeable(rq, skb)) {
+-                              dev_kfree_skb(skb);
++              if (vi->mergeable_rx_bufs) {
++                      skb = receive_mergeable(dev, rq, page, len);
++                      if (unlikely(!skb))
++                              return;
++              } else {
++                      skb = page_to_skb(rq, page, len);
++                      if (unlikely(!skb)) {
++                              dev->stats.rx_dropped++;
++                              give_pages(rq, page);
+                               return;
+                       }
++              }
+       }
+       hdr = skb_vnet_hdr(skb);
diff --git a/queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch b/queue-3.10/vlan-fix-header-ops-passthru-when-doing-tx-vlan-offload.patch
new file mode 100644 (file)
index 0000000..6f22041
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Mon Jan 13 09:39:01 PST 2014
+From: "David S. Miller" <davem@davemloft.net>
+Date: Tue, 31 Dec 2013 16:23:35 -0500
+Subject: vlan: Fix header ops passthru when doing TX VLAN offload.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ]
+
+When the vlan code detects that the real device can do TX VLAN offloads
+in hardware, it tries to arrange for the real device's header_ops to
+be invoked directly.
+
+But it does so illegally, by simply hooking the real device's
+header_ops up to the VLAN device.
+
+This doesn't work because we will end up invoking a set of header_ops
+routines which expect a device type which matches the real device, but
+will see a VLAN device instead.
+
+Fix this by providing a pass-thru set of header_ops which will arrange
+to pass the proper real device instead.
+
+To facilitate this add a dev_rebuild_header().  There are
+implementations which provide a ->cache and ->create but not a
+->rebuild (f.e. PLIP).  So we need a helper function just like
+dev_hard_header() to avoid crashes.
+
+Use this helper in the one existing place where the
+header_ops->rebuild was being invoked, the neighbour code.
+
+With lots of help from Florian Westphal.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    9 +++++++++
+ net/8021q/vlan_dev.c      |   19 ++++++++++++++++++-
+ net/core/neighbour.c      |    2 +-
+ 3 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1772,6 +1772,15 @@ static inline int dev_parse_header(const
+       return dev->header_ops->parse(skb, haddr);
+ }
++static inline int dev_rebuild_header(struct sk_buff *skb)
++{
++      const struct net_device *dev = skb->dev;
++
++      if (!dev->header_ops || !dev->header_ops->rebuild)
++              return 0;
++      return dev->header_ops->rebuild(skb);
++}
++
+ typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
+ extern int            register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+ static inline int unregister_gifconf(unsigned int family)
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -549,6 +549,23 @@ static const struct header_ops vlan_head
+       .parse   = eth_header_parse,
+ };
++static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev,
++                                   unsigned short type,
++                                   const void *daddr, const void *saddr,
++                                   unsigned int len)
++{
++      struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
++      struct net_device *real_dev = vlan->real_dev;
++
++      return dev_hard_header(skb, real_dev, type, daddr, saddr, len);
++}
++
++static const struct header_ops vlan_passthru_header_ops = {
++      .create  = vlan_passthru_hard_header,
++      .rebuild = dev_rebuild_header,
++      .parse   = eth_header_parse,
++};
++
+ static struct device_type vlan_type = {
+       .name   = "vlan",
+ };
+@@ -592,7 +609,7 @@ static int vlan_dev_init(struct net_devi
+       dev->needed_headroom = real_dev->needed_headroom;
+       if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
+-              dev->header_ops      = real_dev->header_ops;
++              dev->header_ops      = &vlan_passthru_header_ops;
+               dev->hard_header_len = real_dev->hard_header_len;
+       } else {
+               dev->header_ops      = &vlan_header_ops;
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1274,7 +1274,7 @@ int neigh_compat_output(struct neighbour
+       if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+                           skb->len) < 0 &&
+-          dev->header_ops->rebuild(skb))
++          dev_rebuild_header(skb))
+               return 0;
+       return dev_queue_xmit(skb);