]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 May 2014 03:43:43 +0000 (20:43 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 May 2014 03:43:43 +0000 (20:43 -0700)
added patches:
act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch
bonding-remove-debug_fs-files-when-module-init-fails.patch
filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch
ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch
ipv4-initialise-the-itag-variable-in-__mkroute_input.patch
ipv6-limit-mtu-to-65575-bytes.patch
l2tp-take-pmtu-from-tunnel-udp-socket.patch
list-introduce-list_next_entry-and-list_prev_entry.patch
net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch
net-gro-reset-skb-truesize-in-napi_reuse_skb.patch
net-ipv4-current-group_info-should-be-put-after-using.patch
net-ipv4-ip_forward-fix-inverted-local_df-test.patch
net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch
net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch
revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch
rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch
rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch
skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch
tcp_cubic-fix-the-range-of-delayed_ack.patch
tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch

21 files changed:
queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch [new file with mode: 0644]
queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch [new file with mode: 0644]
queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch [new file with mode: 0644]
queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch [new file with mode: 0644]
queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch [new file with mode: 0644]
queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch [new file with mode: 0644]
queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch [new file with mode: 0644]
queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch [new file with mode: 0644]
queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch [new file with mode: 0644]
queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch [new file with mode: 0644]
queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch [new file with mode: 0644]
queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch [new file with mode: 0644]
queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch [new file with mode: 0644]
queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch [new file with mode: 0644]
queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch [new file with mode: 0644]
queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch [new file with mode: 0644]
queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch [new file with mode: 0644]
queue-3.4/series
queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch [new file with mode: 0644]
queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch [new file with mode: 0644]
queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch [new file with mode: 0644]

diff --git a/queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch b/queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch
new file mode 100644 (file)
index 0000000..31d9c20
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 15 Aug 2012 20:44:27 +0000
+Subject: act_mirred: do not drop packets when fails to mirror it
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 16c0b164bd24d44db137693a36b428ba28970c62 ]
+
+We drop packet unconditionally when we fail to mirror it. This is not intended
+in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge
+to a tap device used by a VM. When kernel fails to mirror the packet in
+conditions such as when qemu crashes or stop polling the tap, it's hard for the
+management software to detect such condition and clean the the mirroring
+before. This would lead all packets to the bridge to be dropped and break the
+netowrk of other virtual machines.
+
+To solve the issue, the patch does not drop packets when kernel fails to mirror
+it, and only drop the redirected packets.
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -201,13 +201,12 @@ static int tcf_mirred(struct sk_buff *sk
+ out:
+       if (err) {
+               m->tcf_qstats.overlimits++;
+-              /* should we be asking for packet to be dropped?
+-               * may make sense for redirect case only
+-               */
+-              retval = TC_ACT_SHOT;
+-      } else {
++              if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
++                      retval = TC_ACT_SHOT;
++              else
++                      retval = m->tcf_action;
++      } else
+               retval = m->tcf_action;
+-      }
+       spin_unlock(&m->tcf_lock);
+       return retval;
diff --git a/queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch b/queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch
new file mode 100644 (file)
index 0000000..91bf313
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Thomas Richter <tmricht@linux.vnet.ibm.com>
+Date: Wed, 9 Apr 2014 12:52:59 +0200
+Subject: bonding: Remove debug_fs files when module init fails
+
+From: Thomas Richter <tmricht@linux.vnet.ibm.com>
+
+[ Upstream commit db29868653394937037d71dc3545768302dda643 ]
+
+Remove the bonding debug_fs entries when the
+module initialization fails. The debug_fs
+entries should be removed together with all other
+already allocated resources.
+
+Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
+Signed-off-by: Jay Vosburgh <j.vosburgh@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -4930,6 +4930,7 @@ static int __init bonding_init(void)
+ out:
+       return res;
+ err:
++      bond_destroy_debugfs();
+       rtnl_link_unregister(&bond_link_ops);
+ err_link:
+       unregister_pernet_subsys(&bond_net_ops);
diff --git a/queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch b/queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch
new file mode 100644 (file)
index 0000000..16ea497
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Mathias Krause <minipli@googlemail.com>
+Date: Sun, 13 Apr 2014 18:23:33 +0200
+Subject: filter: prevent nla extensions to peek beyond the end of the message
+
+From: Mathias Krause <minipli@googlemail.com>
+
+[ Upstream commit 05ab8f2647e4221cbdb3856dd7d32bd5407316b3 ]
+
+The BPF_S_ANC_NLATTR and BPF_S_ANC_NLATTR_NEST extensions fail to check
+for a minimal message length before testing the supplied offset to be
+within the bounds of the message. This allows the subtraction of the nla
+header to underflow and therefore -- as the data type is unsigned --
+allowing far to big offset and length values for the search of the
+netlink attribute.
+
+The remainder calculation for the BPF_S_ANC_NLATTR_NEST extension is
+also wrong. It has the minuend and subtrahend mixed up, therefore
+calculates a huge length value, allowing to overrun the end of the
+message while looking for the netlink attribute.
+
+The following three BPF snippets will trigger the bugs when attached to
+a UNIX datagram socket and parsing a message with length 1, 2 or 3.
+
+ ,-[ PoC for missing size check in BPF_S_ANC_NLATTR ]--
+ | ld  #0x87654321
+ | ldx #42
+ | ld  #nla
+ | ret a
+ `---
+
+ ,-[ PoC for the same bug in BPF_S_ANC_NLATTR_NEST ]--
+ | ld  #0x87654321
+ | ldx #42
+ | ld  #nlan
+ | ret a
+ `---
+
+ ,-[ PoC for wrong remainder calculation in BPF_S_ANC_NLATTR_NEST ]--
+ | ; (needs a fake netlink header at offset 0)
+ | ld  #0
+ | ldx #42
+ | ld  #nlan
+ | ret a
+ `---
+
+Fix the first issue by ensuring the message length fulfills the minimal
+size constrains of a nla header. Fix the second bug by getting the math
+for the remainder calculation right.
+
+Fixes: 4738c1db15 ("[SKFILTER]: Add SKF_ADF_NLATTR instruction")
+Fixes: d214c7537b ("filter: add SKF_AD_NLATTR_NEST to look for nested..")
+Cc: Patrick McHardy <kaber@trash.net>
+Cc: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Acked-by: Daniel Borkmann <dborkman@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -338,11 +338,15 @@ load_b:
+                       if (skb_is_nonlinear(skb))
+                               return 0;
++                      if (skb->len < sizeof(struct nlattr))
++                              return 0;
++                      if (skb->len < sizeof(struct nlattr))
++                              return 0;
+                       if (A > skb->len - sizeof(struct nlattr))
+                               return 0;
+                       nla = (struct nlattr *)&skb->data[A];
+-                      if (nla->nla_len > A - skb->len)
++                      if (nla->nla_len > skb->len - A)
+                               return 0;
+                       nla = nla_find_nested(nla, X);
diff --git a/queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch b/queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch
new file mode 100644 (file)
index 0000000..d34b739
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Sergey Popovich <popovich_sergei@mail.ru>
+Date: Tue, 6 May 2014 18:23:08 +0300
+Subject: ipv4: fib_semantics: increment fib_info_cnt after fib_info allocation
+
+From: Sergey Popovich <popovich_sergei@mail.ru>
+
+[ Upstream commit aeefa1ecfc799b0ea2c4979617f14cecd5cccbfd ]
+
+Increment fib_info_cnt in fib_create_info() right after successfuly
+alllocating fib_info structure, overwise fib_metrics allocation failure
+leads to fib_info_cnt incorrectly decremented in free_fib_info(), called
+on error path from fib_create_info().
+
+Signed-off-by: Sergey Popovich <popovich_sergei@mail.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -751,13 +751,13 @@ struct fib_info *fib_create_info(struct
+       fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+       if (fi == NULL)
+               goto failure;
++      fib_info_cnt++;
+       if (cfg->fc_mx) {
+               fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+               if (!fi->fib_metrics)
+                       goto failure;
+       } else
+               fi->fib_metrics = (u32 *) dst_default_metrics;
+-      fib_info_cnt++;
+       fi->fib_net = hold_net(net);
+       fi->fib_protocol = cfg->fc_protocol;
diff --git a/queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch b/queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch
new file mode 100644 (file)
index 0000000..d9c01eb
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Li RongQing <roy.qing.li@gmail.com>
+Date: Thu, 22 May 2014 16:36:55 +0800
+Subject: ipv4: initialise the itag variable in __mkroute_input
+
+From: Li RongQing <roy.qing.li@gmail.com>
+
+[ Upstream commit fbdc0ad095c0a299e9abf5d8ac8f58374951149a ]
+
+the value of itag is a random value from stack, and may not be initiated by
+fib_validate_source, which called fib_combine_itag if CONFIG_IP_ROUTE_CLASSID
+is not set
+
+This will make the cached dst uncertainty
+
+Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
+Acked-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2129,7 +2129,7 @@ static int __mkroute_input(struct sk_buf
+       struct in_device *out_dev;
+       unsigned int flags = 0;
+       __be32 spec_dst;
+-      u32 itag;
++      u32 itag = 0;
+       /* get a working reference to the output device */
+       out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
diff --git a/queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch b/queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch
new file mode 100644 (file)
index 0000000..7fe64f6
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 10 Apr 2014 21:23:36 -0700
+Subject: ipv6: Limit mtu to 65575 bytes
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 30f78d8ebf7f514801e71b88a10c948275168518 ]
+
+Francois reported that setting big mtu on loopback device could prevent
+tcp sessions making progress.
+
+We do not support (yet ?) IPv6 Jumbograms and cook corrupted packets.
+
+We must limit the IPv6 MTU to (65535 + 40) bytes in theory.
+
+Tested:
+
+ifconfig lo mtu 70000
+netperf -H ::1
+
+Before patch : Throughput :   0.05 Mbits
+
+After patch : Throughput : 35484 Mbits
+
+Reported-by: Francois WELLENREITER <f.wellenreiter@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_route.h |    5 +++++
+ net/ipv6/route.c        |    5 +++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -34,6 +34,11 @@ struct route_info {
+ #define RT6_LOOKUP_F_SRCPREF_PUBLIC   0x00000010
+ #define RT6_LOOKUP_F_SRCPREF_COA      0x00000020
++/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
++ * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
++ */
++#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr))
++
+ /*
+  * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
+  * between IPV6_ADDR_PREFERENCES socket option values
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1092,7 +1092,7 @@ static unsigned int ip6_mtu(const struct
+       unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+       if (mtu)
+-              return mtu;
++              goto out;
+       mtu = IPV6_MIN_MTU;
+@@ -1102,7 +1102,8 @@ static unsigned int ip6_mtu(const struct
+               mtu = idev->cnf.mtu6;
+       rcu_read_unlock();
+-      return mtu;
++out:
++      return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ }
+ static struct dst_entry *icmp6_dst_gc_list;
diff --git a/queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch b/queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch
new file mode 100644 (file)
index 0000000..b42a817
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Dmitry Petukhov <dmgenp@gmail.com>
+Date: Wed, 9 Apr 2014 02:23:20 +0600
+Subject: l2tp: take PMTU from tunnel UDP socket
+
+From: Dmitry Petukhov <dmgenp@gmail.com>
+
+[ Upstream commit f34c4a35d87949fbb0e0f31eba3c054e9f8199ba ]
+
+When l2tp driver tries to get PMTU for the tunnel destination, it uses
+the pointer to struct sock that represents PPPoX socket, while it
+should use the pointer that represents UDP socket of the tunnel.
+
+Signed-off-by: Dmitry Petukhov <dmgenp@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -772,9 +772,9 @@ static int pppol2tp_connect(struct socke
+       session->deref = pppol2tp_session_sock_put;
+       /* If PMTU discovery was enabled, use the MTU that was discovered */
+-      dst = sk_dst_get(sk);
++      dst = sk_dst_get(tunnel->sock);
+       if (dst != NULL) {
+-              u32 pmtu = dst_mtu(__sk_dst_get(sk));
++              u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
+               if (pmtu != 0)
+                       session->mtu = session->mru = pmtu -
+                               PPPOL2TP_HEADER_OVERHEAD;
diff --git a/queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch b/queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch
new file mode 100644 (file)
index 0000000..016b01f
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 12 Nov 2013 15:10:01 -0800
+Subject: list: introduce list_next_entry() and list_prev_entry()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 008208c6b26f21c2648c250a09c55e737c02c5f8 ]
+
+Add two trivial helpers list_next_entry() and list_prev_entry(), they
+can have a lot of users including list.h itself.  In fact the 1st one is
+already defined in events/core.c and bnx2x_sp.c, so the patch simply
+moves the definition to list.h.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eilon Greenstein <eilong@broadcom.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c |    3 ---
+ include/linux/list.h                           |   16 ++++++++++++++++
+ kernel/events/core.c                           |    3 ---
+ 3 files changed, 16 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+@@ -1030,9 +1030,6 @@ static void bnx2x_set_one_vlan_mac_e1h(s
+                                    ETH_VLAN_FILTER_CLASSIFY, config);
+ }
+-#define list_next_entry(pos, member) \
+-      list_entry((pos)->member.next, typeof(*(pos)), member)
+-
+ /**
+  * bnx2x_vlan_mac_restore - reconfigure next MAC/VLAN/VLAN-MAC element
+  *
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -362,6 +362,22 @@ static inline void list_splice_tail_init
+       list_entry((ptr)->next, type, member)
+ /**
++ * list_next_entry - get the next element in list
++ * @pos:      the type * to cursor
++ * @member:   the name of the list_struct within the struct.
++ */
++#define list_next_entry(pos, member) \
++      list_entry((pos)->member.next, typeof(*(pos)), member)
++
++/**
++ * list_prev_entry - get the prev element in list
++ * @pos:      the type * to cursor
++ * @member:   the name of the list_struct within the struct.
++ */
++#define list_prev_entry(pos, member) \
++      list_entry((pos)->member.prev, typeof(*(pos)), member)
++
++/**
+  * list_for_each      -       iterate over a list
+  * @pos:      the &struct list_head to use as a loop cursor.
+  * @head:     the head for your list.
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -1973,9 +1973,6 @@ static void __perf_event_sync_stat(struc
+       perf_event_update_userpage(next_event);
+ }
+-#define list_next_entry(pos, member) \
+-      list_entry(pos->member.next, typeof(*pos), member)
+-
+ static void perf_event_sync_stat(struct perf_event_context *ctx,
+                                  struct perf_event_context *next_ctx)
+ {
diff --git a/queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch b/queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch
new file mode 100644 (file)
index 0000000..1550483
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 9 Apr 2014 10:28:50 +0200
+Subject: net: core: don't account for udp header size when computing seglen
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 6d39d589bb76ee8a1c6cde6822006ae0053decff ]
+
+In case of tcp, gso_size contains the tcpmss.
+
+For UFO (udp fragmentation offloading) skbs, gso_size is the fragment
+payload size, i.e. we must not account for udp header size.
+
+Otherwise, when using virtio drivers, a to-be-forwarded UFO GSO packet
+will be needlessly fragmented in the forward path, because we think its
+individual segments are too large for the outgoing link.
+
+Fixes: fe6cc55f3a9a053 ("net: ip, ipv6: handle gso skbs in forwarding path")
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Reported-by: Tobias Brunner <tobias@strongswan.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -3297,12 +3297,14 @@ EXPORT_SYMBOL(__skb_warn_lro_forwarding)
+ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+ {
+       const struct skb_shared_info *shinfo = skb_shinfo(skb);
+-      unsigned int hdr_len;
+       if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+-              hdr_len = tcp_hdrlen(skb);
+-      else
+-              hdr_len = sizeof(struct udphdr);
+-      return hdr_len + shinfo->gso_size;
++              return tcp_hdrlen(skb) + shinfo->gso_size;
++
++      /* UFO sets gso_size to the size of the fragmentation
++       * payload, i.e. the size of the L4 (UDP) header is already
++       * accounted for.
++       */
++      return shinfo->gso_size;
+ }
+ EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch b/queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch
new file mode 100644 (file)
index 0000000..902083b
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 3 Apr 2014 09:28:10 -0700
+Subject: net-gro: reset skb->truesize in napi_reuse_skb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e33d0ba8047b049c9262fdb1fcafb93cb52ceceb ]
+
+Recycling skb always had been very tough...
+
+This time it appears GRO layer can accumulate skb->truesize
+adjustments made by drivers when they attach a fragment to skb.
+
+skb_gro_receive() can only subtract from skb->truesize the used part
+of a fragment.
+
+I spotted this problem seeing TcpExtPruneCalled and
+TcpExtTCPRcvCollapsed that were unexpected with a recent kernel, where
+TCP receive window should be sized properly to accept traffic coming
+from a driver not overshooting skb->truesize.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3574,6 +3574,7 @@ static void napi_reuse_skb(struct napi_s
+       skb->vlan_tci = 0;
+       skb->dev = napi->dev;
+       skb->skb_iif = 0;
++      skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+       napi->skb = skb;
+ }
diff --git a/queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch b/queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch
new file mode 100644 (file)
index 0000000..5eb4e0d
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: "Wang, Xiaoming" <xiaoming.wang@intel.com>
+Date: Mon, 14 Apr 2014 12:30:45 -0400
+Subject: net: ipv4: current group_info should be put after using.
+
+From: "Wang, Xiaoming" <xiaoming.wang@intel.com>
+
+[ Upstream commit b04c46190219a4f845e46a459e3102137b7f6cac ]
+
+Plug a group_info refcount leak in ping_init.
+group_info is only needed during initialization and
+the code failed to release the reference on exit.
+While here move grabbing the reference to a place
+where it is actually needed.
+
+Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
+Signed-off-by: Zhang Dongxing <dongxing.zhang@intel.com>
+Signed-off-by: xiaoming wang <xiaoming.wang@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ping.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ping.c
++++ b/net/ipv4/ping.c
+@@ -203,26 +203,33 @@ static int ping_init_sock(struct sock *s
+       struct net *net = sock_net(sk);
+       gid_t group = current_egid();
+       gid_t range[2];
+-      struct group_info *group_info = get_current_groups();
+-      int i, j, count = group_info->ngroups;
++      struct group_info *group_info;
++      int i, j, count;
++      int ret = 0;
+       inet_get_ping_group_range_net(net, range, range+1);
+       if (range[0] <= group && group <= range[1])
+               return 0;
++      group_info = get_current_groups();
++      count = group_info->ngroups;
+       for (i = 0; i < group_info->nblocks; i++) {
+               int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
+               for (j = 0; j < cp_count; j++) {
+                       group = group_info->blocks[i][j];
+                       if (range[0] <= group && group <= range[1])
+-                              return 0;
++                              goto out_release_group;
+               }
+               count -= cp_count;
+       }
+-      return -EACCES;
++      ret = -EACCES;
++
++out_release_group:
++      put_group_info(group_info);
++      return ret;
+ }
+ static void ping_close(struct sock *sk, long timeout)
diff --git a/queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch b/queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch
new file mode 100644 (file)
index 0000000..e1b4717
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Florian Westphal <fw@strlen.de>
+Date: Sun, 4 May 2014 23:24:31 +0200
+Subject: net: ipv4: ip_forward: fix inverted local_df test
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit ca6c5d4ad216d5942ae544bbf02503041bd802aa ]
+
+local_df means 'ignore DF bit if set', so if its set we're
+allowed to perform ip fragmentation.
+
+This wasn't noticed earlier because the output path also drops such skbs
+(and emits needed icmp error) and because netfilter ip defrag did not
+set local_df until couple of days ago.
+
+Only difference is that DF-packets-larger-than MTU now discarded
+earlier (f.e. we avoid pointless netfilter postrouting trip).
+
+While at it, drop the repeated test ip_exceeds_mtu, checking it once
+is enough...
+
+Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_forward.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -42,12 +42,12 @@
+ static bool ip_may_fragment(const struct sk_buff *skb)
+ {
+       return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+-             !skb->local_df;
++              skb->local_df;
+ }
+ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+ {
+-      if (skb->len <= mtu || skb->local_df)
++      if (skb->len <= mtu)
+               return false;
+       if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
diff --git a/queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch b/queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch
new file mode 100644 (file)
index 0000000..c8a7be6
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Wed, 9 Apr 2014 16:10:20 +0200
+Subject: net: sctp: test if association is dead in sctp_wake_up_waiters
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 1e1cdf8ac78793e0875465e98a648df64694a8d0 ]
+
+In function sctp_wake_up_waiters(), we need to involve a test
+if the association is declared dead. If so, we don't have any
+reference to a possible sibling association anymore and need
+to invoke sctp_write_space() instead, and normally walk the
+socket's associations and notify them of new wmem space. The
+reason for special casing is that otherwise, we could run
+into the following issue when a sctp_primitive_SEND() call
+from sctp_sendmsg() fails, and tries to flush an association's
+outq, i.e. in the following way:
+
+sctp_association_free()
+`-> list_del(&asoc->asocs)         <-- poisons list pointer
+    asoc->base.dead = true
+    sctp_outq_free(&asoc->outqueue)
+    `-> __sctp_outq_teardown()
+     `-> sctp_chunk_free()
+      `-> consume_skb()
+       `-> sctp_wfree()
+        `-> sctp_wake_up_waiters() <-- dereferences poisoned pointers
+                                       if asoc->ep->sndbuf_policy=0
+
+Therefore, only walk the list in an 'optimized' way if we find
+that the current association is still active. We could also use
+list_del_init() in addition when we call sctp_association_free(),
+but as Vlad suggests, we want to trap such bugs and thus leave
+it poisoned as is.
+
+Why is it safe to resolve the issue by testing for asoc->base.dead?
+Parallel calls to sctp_sendmsg() are protected under socket lock,
+that is lock_sock()/release_sock(). Only within that path under
+lock held, we're setting skb/chunk owner via sctp_set_owner_w().
+Eventually, chunks are freed directly by an association still
+under that lock. So when traversing association list on destruction
+time from sctp_wake_up_waiters() via sctp_wfree(), a different
+CPU can't be running sctp_wfree() while another one calls
+sctp_association_free() as both happens under the same lock.
+Therefore, this can also not race with setting/testing against
+asoc->base.dead as we are guaranteed for this to happen in order,
+under lock. Further, Vlad says: the times we check asoc->base.dead
+is when we've cached an association pointer for later processing.
+In between cache and processing, the association may have been
+freed and is simply still around due to reference counts. We check
+asoc->base.dead under a lock, so it should always be safe to check
+and not race against sctp_association_free(). Stress-testing seems
+fine now, too.
+
+Fixes: cd253f9f357d ("net: sctp: wake up all assocs if sndbuf policy is per socket")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Vlad Yasevich <vyasevic@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Vlad Yasevich <vyasevic@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -6380,6 +6380,12 @@ static void sctp_wake_up_waiters(struct
+       if (asoc->ep->sndbuf_policy)
+               return __sctp_write_space(asoc);
++      /* If association goes down and is just flushing its
++       * outq, then just normally notify others.
++       */
++      if (asoc->base.dead)
++              return sctp_write_space(sk);
++
+       /* Accounting for the sndbuf space is per socket, so we
+        * need to wake up others, try to be fair and in case of
+        * other associations, let them have a go first instead
diff --git a/queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch b/queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch
new file mode 100644 (file)
index 0000000..0eed9e6
--- /dev/null
@@ -0,0 +1,116 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Tue, 8 Apr 2014 17:26:13 +0200
+Subject: net: sctp: wake up all assocs if sndbuf policy is per socket
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 52c35befb69b005c3fc5afdaae3a5717ad013411 ]
+
+SCTP charges chunks for wmem accounting via skb->truesize in
+sctp_set_owner_w(), and sctp_wfree() respectively as the
+reverse operation. If a sender runs out of wmem, it needs to
+wait via sctp_wait_for_sndbuf(), and gets woken up by a call
+to __sctp_write_space() mostly via sctp_wfree().
+
+__sctp_write_space() is being called per association. Although
+we assign sk->sk_write_space() to sctp_write_space(), which
+is then being done per socket, it is only used if send space
+is increased per socket option (SO_SNDBUF), as SOCK_USE_WRITE_QUEUE
+is set and therefore not invoked in sock_wfree().
+
+Commit 4c3a5bdae293 ("sctp: Don't charge for data in sndbuf
+again when transmitting packet") fixed an issue where in case
+sctp_packet_transmit() manages to queue up more than sndbuf
+bytes, sctp_wait_for_sndbuf() will never be woken up again
+unless it is interrupted by a signal. However, a still
+remaining issue is that if net.sctp.sndbuf_policy=0, that is
+accounting per socket, and one-to-many sockets are in use,
+the reclaimed write space from sctp_wfree() is 'unfairly'
+handed back on the server to the association that is the lucky
+one to be woken up again via __sctp_write_space(), while
+the remaining associations are never be woken up again
+(unless by a signal).
+
+The effect disappears with net.sctp.sndbuf_policy=1, that
+is wmem accounting per association, as it guarantees a fair
+share of wmem among associations.
+
+Therefore, if we have reclaimed memory in case of per socket
+accounting, wake all related associations to a socket in a
+fair manner, that is, traverse the socket association list
+starting from the current neighbour of the association and
+issue a __sctp_write_space() to everyone until we end up
+waking ourselves. This guarantees that no association is
+preferred over another and even if more associations are
+taken into the one-to-many session, all receivers will get
+messages from the server and are not stalled forever on
+high load. This setting still leaves the advantage of per
+socket accounting in touch as an association can still use
+up global limits if unused by others.
+
+Fixes: 4eb701dfc618 ("[SCTP] Fix SCTP sendbuffer accouting.")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Thomas Graf <tgraf@suug.ch>
+Cc: Neil Horman <nhorman@tuxdriver.com>
+Cc: Vlad Yasevich <vyasevic@redhat.com>
+Acked-by: Vlad Yasevich <vyasevic@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |   36 +++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -6369,6 +6369,40 @@ static void __sctp_write_space(struct sc
+       }
+ }
++static void sctp_wake_up_waiters(struct sock *sk,
++                               struct sctp_association *asoc)
++{
++      struct sctp_association *tmp = asoc;
++
++      /* We do accounting for the sndbuf space per association,
++       * so we only need to wake our own association.
++       */
++      if (asoc->ep->sndbuf_policy)
++              return __sctp_write_space(asoc);
++
++      /* Accounting for the sndbuf space is per socket, so we
++       * need to wake up others, try to be fair and in case of
++       * other associations, let them have a go first instead
++       * of just doing a sctp_write_space() call.
++       *
++       * Note that we reach sctp_wake_up_waiters() only when
++       * associations free up queued chunks, thus we are under
++       * lock and the list of associations on a socket is
++       * guaranteed not to change.
++       */
++      for (tmp = list_next_entry(tmp, asocs); 1;
++           tmp = list_next_entry(tmp, asocs)) {
++              /* Manually skip the head element. */
++              if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs))
++                      continue;
++              /* Wake up association. */
++              __sctp_write_space(tmp);
++              /* We've reached the end. */
++              if (tmp == asoc)
++                      break;
++      }
++}
++
+ /* Do accounting for the sndbuf space.
+  * Decrement the used sndbuf space of the corresponding association by the
+  * data size which was just transmitted(freed).
+@@ -6396,7 +6430,7 @@ static void sctp_wfree(struct sk_buff *s
+       sk_mem_uncharge(sk, skb->truesize);
+       sock_wfree(skb);
+-      __sctp_write_space(asoc);
++      sctp_wake_up_waiters(sk, asoc);
+       sctp_association_put(asoc);
+ }
diff --git a/queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch b/queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch
new file mode 100644 (file)
index 0000000..b550599
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Vlad Yasevich <vyasevic@redhat.com>
+Date: Tue, 29 Apr 2014 10:09:51 -0400
+Subject: Revert "macvlan : fix checksums error when we are in bridge mode"
+
+From: Vlad Yasevich <vyasevic@redhat.com>
+
+[ Upstream commit f114890cdf84d753f6b41cd0cc44ba51d16313da ]
+
+This reverts commit 12a2856b604476c27d85a5f9a57ae1661fc46019.
+The commit above doesn't appear to be necessary any more as the
+checksums appear to be correctly computed/validated.
+
+Additionally the above commit breaks kvm configurations where
+one VM is using a device that support checksum offload (virtio) and
+the other VM does not.
+In this case, packets leaving virtio device will have CHECKSUM_PARTIAL
+set.  The packets is forwarded to a macvtap that has offload features
+turned off.  Since we use CHECKSUM_UNNECESSARY, the host does does not
+update the checksum and thus a bad checksum is passed up to
+the guest.
+
+CC: Daniel Lezcano <daniel.lezcano@free.fr>
+CC: Patrick McHardy <kaber@trash.net>
+CC: Andrian Nord <nightnord@gmail.com>
+CC: Eric Dumazet <eric.dumazet@gmail.com>
+CC: Michael S. Tsirkin <mst@redhat.com>
+CC: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvlan.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/net/macvlan.c
++++ b/drivers/net/macvlan.c
+@@ -237,11 +237,9 @@ static int macvlan_queue_xmit(struct sk_
+       const struct macvlan_dev *vlan = netdev_priv(dev);
+       const struct macvlan_port *port = vlan->port;
+       const struct macvlan_dev *dest;
+-      __u8 ip_summed = skb->ip_summed;
+       if (vlan->mode == MACVLAN_MODE_BRIDGE) {
+               const struct ethhdr *eth = (void *)skb->data;
+-              skb->ip_summed = CHECKSUM_UNNECESSARY;
+               /* send to other bridge ports directly */
+               if (is_multicast_ether_addr(eth->h_dest)) {
+@@ -259,7 +257,6 @@ static int macvlan_queue_xmit(struct sk_
+       }
+ xmit_world:
+-      skb->ip_summed = ip_summed;
+       skb->dev = vlan->lowerdev;
+       return dev_queue_xmit(skb);
+ }
diff --git a/queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch b/queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch
new file mode 100644 (file)
index 0000000..2c36726
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: David Gibson <david@gibson.dropbear.id.au>
+Date: Thu, 24 Apr 2014 10:22:36 +1000
+Subject: rtnetlink: Only supply IFLA_VF_PORTS information when RTEXT_FILTER_VF is set
+
+From: David Gibson <david@gibson.dropbear.id.au>
+
+[ Upstream commit c53864fd60227de025cb79e05493b13f69843971 ]
+
+Since 115c9b81928360d769a76c632bae62d15206a94a (rtnetlink: Fix problem with
+buffer allocation), RTM_NEWLINK messages only contain the IFLA_VFINFO_LIST
+attribute if they were solicited by a GETLINK message containing an
+IFLA_EXT_MASK attribute with the RTEXT_FILTER_VF flag.
+
+That was done because some user programs broke when they received more data
+than expected - because IFLA_VFINFO_LIST contains information for each VF
+it can become large if there are many VFs.
+
+However, the IFLA_VF_PORTS attribute, supplied for devices which implement
+ndo_get_vf_port (currently the 'enic' driver only), has the same problem.
+It supplies per-VF information and can therefore become large, but it is
+not currently conditional on the IFLA_EXT_MASK value.
+
+Worse, it interacts badly with the existing EXT_MASK handling.  When
+IFLA_EXT_MASK is not supplied, the buffer for netlink replies is fixed at
+NLMSG_GOODSIZE.  If the information for IFLA_VF_PORTS exceeds this, then
+rtnl_fill_ifinfo() returns -EMSGSIZE on the first message in a packet.
+netlink_dump() will misinterpret this as having finished the listing and
+omit data for this interface and all subsequent ones.  That can cause
+getifaddrs(3) to enter an infinite loop.
+
+This patch addresses the problem by only supplying IFLA_VF_PORTS when
+IFLA_EXT_MASK is supplied with the RTEXT_FILTER_VF flag set.
+
+Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
+Reviewed-by: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -746,7 +746,8 @@ static inline int rtnl_vfinfo_size(const
+               return 0;
+ }
+-static size_t rtnl_port_size(const struct net_device *dev)
++static size_t rtnl_port_size(const struct net_device *dev,
++                           u32 ext_filter_mask)
+ {
+       size_t port_size = nla_total_size(4)            /* PORT_VF */
+               + nla_total_size(PORT_PROFILE_MAX)      /* PORT_PROFILE */
+@@ -762,7 +763,8 @@ static size_t rtnl_port_size(const struc
+       size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+               + port_size;
+-      if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
++      if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
++          !(ext_filter_mask & RTEXT_FILTER_VF))
+               return 0;
+       if (dev_num_vf(dev->dev.parent))
+               return port_self_size + vf_ports_size +
+@@ -793,7 +795,7 @@ static noinline size_t if_nlmsg_size(con
+              + nla_total_size(ext_filter_mask
+                               & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+              + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+-             + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
++             + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+              + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+              + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+ }
+@@ -853,11 +855,13 @@ static int rtnl_port_self_fill(struct sk
+       return 0;
+ }
+-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
++static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
++                        u32 ext_filter_mask)
+ {
+       int err;
+-      if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
++      if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
++          !(ext_filter_mask & RTEXT_FILTER_VF))
+               return 0;
+       err = rtnl_port_self_fill(skb, dev);
+@@ -1004,7 +1008,7 @@ static int rtnl_fill_ifinfo(struct sk_bu
+               nla_nest_end(skb, vfinfo);
+       }
+-      if (rtnl_port_fill(skb, dev))
++      if (rtnl_port_fill(skb, dev, ext_filter_mask))
+               goto nla_put_failure;
+       if (dev->rtnl_link_ops) {
diff --git a/queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch b/queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch
new file mode 100644 (file)
index 0000000..aea69dd
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: David Gibson <david@gibson.dropbear.id.au>
+Date: Thu, 24 Apr 2014 10:22:35 +1000
+Subject: rtnetlink: Warn when interface's information won't fit in our packet
+
+From: David Gibson <david@gibson.dropbear.id.au>
+
+[ Upstream commit 973462bbde79bb827824c73b59027a0aed5c9ca6 ]
+
+Without IFLA_EXT_MASK specified, the information reported for a single
+interface in response to RTM_GETLINK is expected to fit within a netlink
+packet of NLMSG_GOODSIZE.
+
+If it doesn't, however, things will go badly wrong,  When listing all
+interfaces, netlink_dump() will incorrectly treat -EMSGSIZE on the first
+message in a packet as the end of the listing and omit information for
+that interface and all subsequent ones.  This can cause getifaddrs(3) to
+enter an infinite loop.
+
+This patch won't fix the problem, but it will WARN_ON() making it easier to
+track down what's going wrong.
+
+Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
+Reviewed-by: Jiri Pirko <jpirko@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1059,6 +1059,7 @@ static int rtnl_dump_ifinfo(struct sk_bu
+       struct hlist_node *node;
+       struct nlattr *tb[IFLA_MAX+1];
+       u32 ext_filter_mask = 0;
++      int err;
+       s_h = cb->args[0];
+       s_idx = cb->args[1];
+@@ -1079,11 +1080,17 @@ static int rtnl_dump_ifinfo(struct sk_bu
+               hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+                       if (idx < s_idx)
+                               goto cont;
+-                      if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+-                                           NETLINK_CB(cb->skb).pid,
+-                                           cb->nlh->nlmsg_seq, 0,
+-                                           NLM_F_MULTI,
+-                                           ext_filter_mask) <= 0)
++                      err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
++                                             NETLINK_CB(cb->skb).pid,
++                                             cb->nlh->nlmsg_seq, 0,
++                                             NLM_F_MULTI,
++                                             ext_filter_mask);
++                      /* If we ran out of room on the first message,
++                       * we're in trouble
++                       */
++                      WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
++
++                      if (err <= 0)
+                               goto out;
+                       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
index 728eae2244b510668918db20f29398bc7c166db5..35efbbf6c24efcd4c502a1dd991f7851e708fb5c 100644 (file)
@@ -11,3 +11,23 @@ usb-option-add-alcatel-l800ma.patch
 usb-option-add-and-update-a-number-of-cmotech-devices.patch
 drm-vmwgfx-correct-fb_fix_screeninfo.line_length.patch
 drm-radeon-call-drm_edid_to_eld-when-we-update-the-edid.patch
+list-introduce-list_next_entry-and-list_prev_entry.patch
+net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch
+net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch
+l2tp-take-pmtu-from-tunnel-udp-socket.patch
+net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch
+bonding-remove-debug_fs-files-when-module-init-fails.patch
+ipv6-limit-mtu-to-65575-bytes.patch
+net-ipv4-current-group_info-should-be-put-after-using.patch
+filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch
+tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch
+rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch
+rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch
+revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch
+tcp_cubic-fix-the-range-of-delayed_ack.patch
+net-ipv4-ip_forward-fix-inverted-local_df-test.patch
+ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch
+act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch
+ipv4-initialise-the-itag-variable-in-__mkroute_input.patch
+skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch
+net-gro-reset-skb-truesize-in-napi_reuse_skb.patch
diff --git a/queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch b/queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch
new file mode 100644 (file)
index 0000000..4477243
--- /dev/null
@@ -0,0 +1,143 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Fri, 4 May 2012 14:26:56 +0000
+Subject: skb: Add inline helper for getting the skb end offset from head
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+[ Upstream commit ec47ea82477404631d49b8e568c71826c9b663ac ]
+
+With the recent changes for how we compute the skb truesize it occurs to me
+we are probably going to have a lot of calls to skb_end_pointer -
+skb->head.  Instead of running all over the place doing that it would make
+more sense to just make it a separate inline skb_end_offset(skb) that way
+we can return the correct value without having gcc having to do all the
+optimization to cancel out skb->head - skb->head.
+
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/atm/ambassador.c             |    2 +-
+ drivers/atm/idt77252.c               |    2 +-
+ drivers/net/wimax/i2400m/usb-rx.c    |    2 +-
+ drivers/staging/octeon/ethernet-tx.c |    2 +-
+ include/linux/skbuff.h               |   12 +++++++++++-
+ net/core/skbuff.c                    |    9 ++++-----
+ 6 files changed, 19 insertions(+), 10 deletions(-)
+
+--- a/drivers/atm/ambassador.c
++++ b/drivers/atm/ambassador.c
+@@ -802,7 +802,7 @@ static void fill_rx_pool (amb_dev * dev,
+     }
+     // cast needed as there is no %? for pointer differences
+     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
+-          skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
++          skb, skb->head, (long) skb_end_offset(skb));
+     rx.handle = virt_to_bus (skb);
+     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
+     if (rx_give (dev, &rx, pool))
+--- a/drivers/atm/idt77252.c
++++ b/drivers/atm/idt77252.c
+@@ -1258,7 +1258,7 @@ idt77252_rx_raw(struct idt77252_dev *car
+       tail = readl(SAR_REG_RAWCT);
+       pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
+-                                  skb_end_pointer(queue) - queue->head - 16,
++                                  skb_end_offset(queue) - 16,
+                                   PCI_DMA_FROMDEVICE);
+       while (head != tail) {
+--- a/drivers/net/wimax/i2400m/usb-rx.c
++++ b/drivers/net/wimax/i2400m/usb-rx.c
+@@ -277,7 +277,7 @@ retry:
+               d_printf(1, dev, "RX: size changed to %d, received %d, "
+                        "copied %d, capacity %ld\n",
+                        rx_size, read_size, rx_skb->len,
+-                       (long) (skb_end_pointer(new_skb) - new_skb->head));
++                       (long) skb_end_offset(new_skb));
+               goto retry;
+       }
+               /* In most cases, it happens due to the hardware scheduling a
+--- a/drivers/staging/octeon/ethernet-tx.c
++++ b/drivers/staging/octeon/ethernet-tx.c
+@@ -345,7 +345,7 @@ int cvm_oct_xmit(struct sk_buff *skb, st
+       }
+       if (unlikely
+           (skb->truesize !=
+-           sizeof(*skb) + skb_end_pointer(skb) - skb->head)) {
++           sizeof(*skb) + skb_end_offset(skb))) {
+               /*
+                  printk("TX buffer truesize has been changed\n");
+                */
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -640,11 +640,21 @@ static inline unsigned char *skb_end_poi
+ {
+       return skb->head + skb->end;
+ }
++
++static inline unsigned int skb_end_offset(const struct sk_buff *skb)
++{
++      return skb->end;
++}
+ #else
+ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
+ {
+       return skb->end;
+ }
++
++static inline unsigned int skb_end_offset(const struct sk_buff *skb)
++{
++      return skb->end - skb->head;
++}
+ #endif
+ /* Internal */
+@@ -2574,7 +2584,7 @@ static inline bool skb_is_recycleable(co
+               return false;
+       skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
+-      if (skb_end_pointer(skb) - skb->head < skb_size)
++      if (skb_end_offset(skb) < skb_size)
+               return false;
+       if (skb_shared(skb) || skb_cloned(skb))
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -821,7 +821,7 @@ static void copy_skb_header(struct sk_bu
+ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
+ {
+       int headerlen = skb_headroom(skb);
+-      unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
++      unsigned int size = skb_end_offset(skb) + skb->data_len;
+       struct sk_buff *n = alloc_skb(size, gfp_mask);
+       if (!n)
+@@ -922,7 +922,7 @@ int pskb_expand_head(struct sk_buff *skb
+ {
+       int i;
+       u8 *data;
+-      int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
++      int size = nhead + skb_end_offset(skb) + ntail;
+       long off;
+       bool fastpath;
+@@ -2721,14 +2721,13 @@ struct sk_buff *skb_segment(struct sk_bu
+                       if (unlikely(!nskb))
+                               goto err;
+-                      hsize = skb_end_pointer(nskb) - nskb->head;
++                      hsize = skb_end_offset(nskb);
+                       if (skb_cow_head(nskb, doffset + headroom)) {
+                               kfree_skb(nskb);
+                               goto err;
+                       }
+-                      nskb->truesize += skb_end_pointer(nskb) - nskb->head -
+-                                        hsize;
++                      nskb->truesize += skb_end_offset(nskb) - hsize;
+                       skb_release_head_state(nskb);
+                       __skb_push(nskb, doffset);
+               } else {
diff --git a/queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch b/queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch
new file mode 100644 (file)
index 0000000..1bae751
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Liu Yu <allanyuliu@tencent.com>
+Date: Wed, 30 Apr 2014 17:34:09 +0800
+Subject: tcp_cubic: fix the range of delayed_ack
+
+From: Liu Yu <allanyuliu@tencent.com>
+
+[ Upstream commit 0cda345d1b2201dd15591b163e3c92bad5191745 ]
+
+commit b9f47a3aaeab (tcp_cubic: limit delayed_ack ratio to prevent
+divide error) try to prevent divide error, but there is still a little
+chance that delayed_ack can reach zero. In case the param cnt get
+negative value, then ratio+cnt would overflow and may happen to be zero.
+As a result, min(ratio, ACK_RATIO_LIMIT) will calculate to be zero.
+
+In some old kernels, such as 2.6.32, there is a bug that would
+pass negative param, which then ultimately leads to this divide error.
+
+commit 5b35e1e6e9c (tcp: fix tcp_trim_head() to adjust segment count
+with skb MSS) fixed the negative param issue. However,
+it's safe that we fix the range of delayed_ack as well,
+to make sure we do not hit a divide by zero.
+
+CC: Stephen Hemminger <shemminger@vyatta.com>
+Signed-off-by: Liu Yu <allanyuliu@tencent.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -408,7 +408,7 @@ static void bictcp_acked(struct sock *sk
+               ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+               ratio += cnt;
+-              ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
++              ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
+       }
+       /* Some calls are for duplicates without timetamps */
diff --git a/queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch b/queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch
new file mode 100644 (file)
index 0000000..ad3621e
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Wed May 28 20:24:34 PDT 2014
+From: Ivan Vecera <ivecera@redhat.com>
+Date: Thu, 17 Apr 2014 14:51:08 +0200
+Subject: tg3: update rx_jumbo_pending ring param only when jumbo frames are enabled
+
+From: Ivan Vecera <ivecera@redhat.com>
+
+The patch fixes a problem with dropped jumbo frames after usage of
+'ethtool -G ... rx'.
+
+Scenario:
+1. ip link set eth0 up
+2. ethtool -G eth0 rx N # <- This zeroes rx-jumbo
+3. ip link set mtu 9000 dev eth0
+
+The ethtool command set rx_jumbo_pending to zero so any received jumbo
+packets are dropped and you need to use 'ethtool -G eth0 rx-jumbo N'
+to workaround the issue.
+The patch changes the logic so rx_jumbo_pending value is changed only if
+jumbo frames are enabled (MTU > 1500).
+
+Signed-off-by: Ivan Vecera <ivecera@redhat.com>
+Acked-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -10861,7 +10861,9 @@ static int tg3_set_ringparam(struct net_
+       if (tg3_flag(tp, MAX_RXPEND_64) &&
+           tp->rx_pending > 63)
+               tp->rx_pending = 63;
+-      tp->rx_jumbo_pending = ering->rx_jumbo_pending;
++
++      if (tg3_flag(tp, JUMBO_RING_ENABLE))
++              tp->rx_jumbo_pending = ering->rx_jumbo_pending;
+       for (i = 0; i < tp->irq_max; i++)
+               tp->napi[i].tx_pending = ering->tx_pending;