From: Greg Kroah-Hartman Date: Thu, 29 May 2014 03:43:43 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.10.41~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=01c49ea33fe7ac13c8fcbb5d968a93dd6634a04b;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch bonding-remove-debug_fs-files-when-module-init-fails.patch filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch ipv4-initialise-the-itag-variable-in-__mkroute_input.patch ipv6-limit-mtu-to-65575-bytes.patch l2tp-take-pmtu-from-tunnel-udp-socket.patch list-introduce-list_next_entry-and-list_prev_entry.patch net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch net-gro-reset-skb-truesize-in-napi_reuse_skb.patch net-ipv4-current-group_info-should-be-put-after-using.patch net-ipv4-ip_forward-fix-inverted-local_df-test.patch net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch tcp_cubic-fix-the-range-of-delayed_ack.patch tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch --- diff --git a/queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch b/queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch new file mode 100644 index 00000000000..31d9c20300b --- /dev/null +++ b/queue-3.4/act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch @@ -0,0 +1,49 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Jason Wang +Date: Wed, 15 Aug 2012 20:44:27 +0000 +Subject: act_mirred: do not drop packets when fails to mirror it + +From: Jason Wang + +[ Upstream commit 16c0b164bd24d44db137693a36b428ba28970c62 ] + +We drop packet unconditionally when we fail to mirror it. This is not intended +in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge +to a tap device used by a VM. When kernel fails to mirror the packet in +conditions such as when qemu crashes or stop polling the tap, it's hard for the +management software to detect such condition and clean the the mirroring +before. This would lead all packets to the bridge to be dropped and break the +netowrk of other virtual machines. + +To solve the issue, the patch does not drop packets when kernel fails to mirror +it, and only drop the redirected packets. + +Signed-off-by: Jason Wang +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_mirred.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -201,13 +201,12 @@ static int tcf_mirred(struct sk_buff *sk + out: + if (err) { + m->tcf_qstats.overlimits++; +- /* should we be asking for packet to be dropped? +- * may make sense for redirect case only +- */ +- retval = TC_ACT_SHOT; +- } else { ++ if (m->tcfm_eaction != TCA_EGRESS_MIRROR) ++ retval = TC_ACT_SHOT; ++ else ++ retval = m->tcf_action; ++ } else + retval = m->tcf_action; +- } + spin_unlock(&m->tcf_lock); + + return retval; diff --git a/queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch b/queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch new file mode 100644 index 00000000000..91bf31331d1 --- /dev/null +++ b/queue-3.4/bonding-remove-debug_fs-files-when-module-init-fails.patch @@ -0,0 +1,32 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Thomas Richter +Date: Wed, 9 Apr 2014 12:52:59 +0200 +Subject: bonding: Remove debug_fs files when module init fails + +From: Thomas Richter + +[ Upstream commit db29868653394937037d71dc3545768302dda643 ] + +Remove the bonding debug_fs entries when the +module initialization fails. The debug_fs +entries should be removed together with all other +already allocated resources. + +Signed-off-by: Thomas Richter +Signed-off-by: Jay Vosburgh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -4930,6 +4930,7 @@ static int __init bonding_init(void) + out: + return res; + err: ++ bond_destroy_debugfs(); + rtnl_link_unregister(&bond_link_ops); + err_link: + unregister_pernet_subsys(&bond_net_ops); diff --git a/queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch b/queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch new file mode 100644 index 00000000000..16ea4978fb6 --- /dev/null +++ b/queue-3.4/filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch @@ -0,0 +1,81 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Mathias Krause +Date: Sun, 13 Apr 2014 18:23:33 +0200 +Subject: filter: prevent nla extensions to peek beyond the end of the message + +From: Mathias Krause + +[ Upstream commit 05ab8f2647e4221cbdb3856dd7d32bd5407316b3 ] + +The BPF_S_ANC_NLATTR and BPF_S_ANC_NLATTR_NEST extensions fail to check +for a minimal message length before testing the supplied offset to be +within the bounds of the message. This allows the subtraction of the nla +header to underflow and therefore -- as the data type is unsigned -- +allowing far to big offset and length values for the search of the +netlink attribute. + +The remainder calculation for the BPF_S_ANC_NLATTR_NEST extension is +also wrong. It has the minuend and subtrahend mixed up, therefore +calculates a huge length value, allowing to overrun the end of the +message while looking for the netlink attribute. + +The following three BPF snippets will trigger the bugs when attached to +a UNIX datagram socket and parsing a message with length 1, 2 or 3. + + ,-[ PoC for missing size check in BPF_S_ANC_NLATTR ]-- + | ld #0x87654321 + | ldx #42 + | ld #nla + | ret a + `--- + + ,-[ PoC for the same bug in BPF_S_ANC_NLATTR_NEST ]-- + | ld #0x87654321 + | ldx #42 + | ld #nlan + | ret a + `--- + + ,-[ PoC for wrong remainder calculation in BPF_S_ANC_NLATTR_NEST ]-- + | ; (needs a fake netlink header at offset 0) + | ld #0 + | ldx #42 + | ld #nlan + | ret a + `--- + +Fix the first issue by ensuring the message length fulfills the minimal +size constrains of a nla header. Fix the second bug by getting the math +for the remainder calculation right. + +Fixes: 4738c1db15 ("[SKFILTER]: Add SKF_ADF_NLATTR instruction") +Fixes: d214c7537b ("filter: add SKF_AD_NLATTR_NEST to look for nested..") +Cc: Patrick McHardy +Cc: Pablo Neira Ayuso +Signed-off-by: Mathias Krause +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -338,11 +338,15 @@ load_b: + + if (skb_is_nonlinear(skb)) + return 0; ++ if (skb->len < sizeof(struct nlattr)) ++ return 0; ++ if (skb->len < sizeof(struct nlattr)) ++ return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *)&skb->data[A]; +- if (nla->nla_len > A - skb->len) ++ if (nla->nla_len > skb->len - A) + return 0; + + nla = nla_find_nested(nla, X); diff --git a/queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch b/queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch new file mode 100644 index 00000000000..d34b7392a87 --- /dev/null +++ b/queue-3.4/ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch @@ -0,0 +1,38 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Sergey Popovich +Date: Tue, 6 May 2014 18:23:08 +0300 +Subject: ipv4: fib_semantics: increment fib_info_cnt after fib_info allocation + +From: Sergey Popovich + +[ Upstream commit aeefa1ecfc799b0ea2c4979617f14cecd5cccbfd ] + +Increment fib_info_cnt in fib_create_info() right after successfuly +alllocating fib_info structure, overwise fib_metrics allocation failure +leads to fib_info_cnt incorrectly decremented in free_fib_info(), called +on error path from fib_create_info(). + +Signed-off-by: Sergey Popovich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -751,13 +751,13 @@ struct fib_info *fib_create_info(struct + fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + if (fi == NULL) + goto failure; ++ fib_info_cnt++; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; + } else + fi->fib_metrics = (u32 *) dst_default_metrics; +- fib_info_cnt++; + + fi->fib_net = hold_net(net); + fi->fib_protocol = cfg->fc_protocol; diff --git a/queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch b/queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch new file mode 100644 index 00000000000..d9c01ebf07f --- /dev/null +++ b/queue-3.4/ipv4-initialise-the-itag-variable-in-__mkroute_input.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Li RongQing +Date: Thu, 22 May 2014 16:36:55 +0800 +Subject: ipv4: initialise the itag variable in __mkroute_input + +From: Li RongQing + +[ Upstream commit fbdc0ad095c0a299e9abf5d8ac8f58374951149a ] + +the value of itag is a random value from stack, and may not be initiated by +fib_validate_source, which called fib_combine_itag if CONFIG_IP_ROUTE_CLASSID +is not set + +This will make the cached dst uncertainty + +Signed-off-by: Li RongQing +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2129,7 +2129,7 @@ static int __mkroute_input(struct sk_buf + struct in_device *out_dev; + unsigned int flags = 0; + __be32 spec_dst; +- u32 itag; ++ u32 itag = 0; + + /* get a working reference to the output device */ + out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); diff --git a/queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch b/queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch new file mode 100644 index 00000000000..7fe64f6a1b1 --- /dev/null +++ b/queue-3.4/ipv6-limit-mtu-to-65575-bytes.patch @@ -0,0 +1,71 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Eric Dumazet +Date: Thu, 10 Apr 2014 21:23:36 -0700 +Subject: ipv6: Limit mtu to 65575 bytes + +From: Eric Dumazet + +[ Upstream commit 30f78d8ebf7f514801e71b88a10c948275168518 ] + +Francois reported that setting big mtu on loopback device could prevent +tcp sessions making progress. + +We do not support (yet ?) IPv6 Jumbograms and cook corrupted packets. + +We must limit the IPv6 MTU to (65535 + 40) bytes in theory. + +Tested: + +ifconfig lo mtu 70000 +netperf -H ::1 + +Before patch : Throughput : 0.05 Mbits + +After patch : Throughput : 35484 Mbits + +Reported-by: Francois WELLENREITER +Signed-off-by: Eric Dumazet +Acked-by: YOSHIFUJI Hideaki +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_route.h | 5 +++++ + net/ipv6/route.c | 5 +++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -34,6 +34,11 @@ struct route_info { + #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 + #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 + ++/* We do not (yet ?) support IPv6 jumbograms (RFC 2675) ++ * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header ++ */ ++#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr)) ++ + /* + * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate + * between IPV6_ADDR_PREFERENCES socket option values +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1092,7 +1092,7 @@ static unsigned int ip6_mtu(const struct + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) +- return mtu; ++ goto out; + + mtu = IPV6_MIN_MTU; + +@@ -1102,7 +1102,8 @@ static unsigned int ip6_mtu(const struct + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + +- return mtu; ++out: ++ return min_t(unsigned int, mtu, IP6_MAX_MTU); + } + + static struct dst_entry *icmp6_dst_gc_list; diff --git a/queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch b/queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch new file mode 100644 index 00000000000..b42a8172d52 --- /dev/null +++ b/queue-3.4/l2tp-take-pmtu-from-tunnel-udp-socket.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Dmitry Petukhov +Date: Wed, 9 Apr 2014 02:23:20 +0600 +Subject: l2tp: take PMTU from tunnel UDP socket + +From: Dmitry Petukhov + +[ Upstream commit f34c4a35d87949fbb0e0f31eba3c054e9f8199ba ] + +When l2tp driver tries to get PMTU for the tunnel destination, it uses +the pointer to struct sock that represents PPPoX socket, while it +should use the pointer that represents UDP socket of the tunnel. + +Signed-off-by: Dmitry Petukhov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -772,9 +772,9 @@ static int pppol2tp_connect(struct socke + session->deref = pppol2tp_session_sock_put; + + /* If PMTU discovery was enabled, use the MTU that was discovered */ +- dst = sk_dst_get(sk); ++ dst = sk_dst_get(tunnel->sock); + if (dst != NULL) { +- u32 pmtu = dst_mtu(__sk_dst_get(sk)); ++ u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock)); + if (pmtu != 0) + session->mtu = session->mru = pmtu - + PPPOL2TP_HEADER_OVERHEAD; diff --git a/queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch b/queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch new file mode 100644 index 00000000000..016b01fdcbe --- /dev/null +++ b/queue-3.4/list-introduce-list_next_entry-and-list_prev_entry.patch @@ -0,0 +1,76 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Oleg Nesterov +Date: Tue, 12 Nov 2013 15:10:01 -0800 +Subject: list: introduce list_next_entry() and list_prev_entry() + +From: Oleg Nesterov + +[ Upstream commit 008208c6b26f21c2648c250a09c55e737c02c5f8 ] + +Add two trivial helpers list_next_entry() and list_prev_entry(), they +can have a lot of users including list.h itself. In fact the 1st one is +already defined in events/core.c and bnx2x_sp.c, so the patch simply +moves the definition to list.h. + +Signed-off-by: Oleg Nesterov +Cc: Eilon Greenstein +Cc: Greg Kroah-Hartman +Cc: Peter Zijlstra +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 3 --- + include/linux/list.h | 16 ++++++++++++++++ + kernel/events/core.c | 3 --- + 3 files changed, 16 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +@@ -1030,9 +1030,6 @@ static void bnx2x_set_one_vlan_mac_e1h(s + ETH_VLAN_FILTER_CLASSIFY, config); + } + +-#define list_next_entry(pos, member) \ +- list_entry((pos)->member.next, typeof(*(pos)), member) +- + /** + * bnx2x_vlan_mac_restore - reconfigure next MAC/VLAN/VLAN-MAC element + * +--- a/include/linux/list.h ++++ b/include/linux/list.h +@@ -362,6 +362,22 @@ static inline void list_splice_tail_init + list_entry((ptr)->next, type, member) + + /** ++ * list_next_entry - get the next element in list ++ * @pos: the type * to cursor ++ * @member: the name of the list_struct within the struct. ++ */ ++#define list_next_entry(pos, member) \ ++ list_entry((pos)->member.next, typeof(*(pos)), member) ++ ++/** ++ * list_prev_entry - get the prev element in list ++ * @pos: the type * to cursor ++ * @member: the name of the list_struct within the struct. ++ */ ++#define list_prev_entry(pos, member) \ ++ list_entry((pos)->member.prev, typeof(*(pos)), member) ++ ++/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -1973,9 +1973,6 @@ static void __perf_event_sync_stat(struc + perf_event_update_userpage(next_event); + } + +-#define list_next_entry(pos, member) \ +- list_entry(pos->member.next, typeof(*pos), member) +- + static void perf_event_sync_stat(struct perf_event_context *ctx, + struct perf_event_context *next_ctx) + { diff --git a/queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch b/queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch new file mode 100644 index 00000000000..15504836c07 --- /dev/null +++ b/queue-3.4/net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch @@ -0,0 +1,50 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Florian Westphal +Date: Wed, 9 Apr 2014 10:28:50 +0200 +Subject: net: core: don't account for udp header size when computing seglen + +From: Florian Westphal + +[ Upstream commit 6d39d589bb76ee8a1c6cde6822006ae0053decff ] + +In case of tcp, gso_size contains the tcpmss. + +For UFO (udp fragmentation offloading) skbs, gso_size is the fragment +payload size, i.e. we must not account for udp header size. + +Otherwise, when using virtio drivers, a to-be-forwarded UFO GSO packet +will be needlessly fragmented in the forward path, because we think its +individual segments are too large for the outgoing link. + +Fixes: fe6cc55f3a9a053 ("net: ip, ipv6: handle gso skbs in forwarding path") +Cc: Eric Dumazet +Reported-by: Tobias Brunner +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3297,12 +3297,14 @@ EXPORT_SYMBOL(__skb_warn_lro_forwarding) + unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) + { + const struct skb_shared_info *shinfo = skb_shinfo(skb); +- unsigned int hdr_len; + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +- hdr_len = tcp_hdrlen(skb); +- else +- hdr_len = sizeof(struct udphdr); +- return hdr_len + shinfo->gso_size; ++ return tcp_hdrlen(skb) + shinfo->gso_size; ++ ++ /* UFO sets gso_size to the size of the fragmentation ++ * payload, i.e. the size of the L4 (UDP) header is already ++ * accounted for. ++ */ ++ return shinfo->gso_size; + } + EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch b/queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch new file mode 100644 index 00000000000..902083bb319 --- /dev/null +++ b/queue-3.4/net-gro-reset-skb-truesize-in-napi_reuse_skb.patch @@ -0,0 +1,39 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Eric Dumazet +Date: Thu, 3 Apr 2014 09:28:10 -0700 +Subject: net-gro: reset skb->truesize in napi_reuse_skb() + +From: Eric Dumazet + +[ Upstream commit e33d0ba8047b049c9262fdb1fcafb93cb52ceceb ] + +Recycling skb always had been very tough... + +This time it appears GRO layer can accumulate skb->truesize +adjustments made by drivers when they attach a fragment to skb. + +skb_gro_receive() can only subtract from skb->truesize the used part +of a fragment. + +I spotted this problem seeing TcpExtPruneCalled and +TcpExtTCPRcvCollapsed that were unexpected with a recent kernel, where +TCP receive window should be sized properly to accept traffic coming +from a driver not overshooting skb->truesize. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3574,6 +3574,7 @@ static void napi_reuse_skb(struct napi_s + skb->vlan_tci = 0; + skb->dev = napi->dev; + skb->skb_iif = 0; ++ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + + napi->skb = skb; + } diff --git a/queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch b/queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch new file mode 100644 index 00000000000..5eb4e0d6533 --- /dev/null +++ b/queue-3.4/net-ipv4-current-group_info-should-be-put-after-using.patch @@ -0,0 +1,64 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: "Wang, Xiaoming" +Date: Mon, 14 Apr 2014 12:30:45 -0400 +Subject: net: ipv4: current group_info should be put after using. + +From: "Wang, Xiaoming" + +[ Upstream commit b04c46190219a4f845e46a459e3102137b7f6cac ] + +Plug a group_info refcount leak in ping_init. +group_info is only needed during initialization and +the code failed to release the reference on exit. +While here move grabbing the reference to a place +where it is actually needed. + +Signed-off-by: Chuansheng Liu +Signed-off-by: Zhang Dongxing +Signed-off-by: xiaoming wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -203,26 +203,33 @@ static int ping_init_sock(struct sock *s + struct net *net = sock_net(sk); + gid_t group = current_egid(); + gid_t range[2]; +- struct group_info *group_info = get_current_groups(); +- int i, j, count = group_info->ngroups; ++ struct group_info *group_info; ++ int i, j, count; ++ int ret = 0; + + inet_get_ping_group_range_net(net, range, range+1); + if (range[0] <= group && group <= range[1]) + return 0; + ++ group_info = get_current_groups(); ++ count = group_info->ngroups; + for (i = 0; i < group_info->nblocks; i++) { + int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); + + for (j = 0; j < cp_count; j++) { + group = group_info->blocks[i][j]; + if (range[0] <= group && group <= range[1]) +- return 0; ++ goto out_release_group; + } + + count -= cp_count; + } + +- return -EACCES; ++ ret = -EACCES; ++ ++out_release_group: ++ put_group_info(group_info); ++ return ret; + } + + static void ping_close(struct sock *sk, long timeout) diff --git a/queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch b/queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch new file mode 100644 index 00000000000..e1b471740e4 --- /dev/null +++ b/queue-3.4/net-ipv4-ip_forward-fix-inverted-local_df-test.patch @@ -0,0 +1,47 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Florian Westphal +Date: Sun, 4 May 2014 23:24:31 +0200 +Subject: net: ipv4: ip_forward: fix inverted local_df test + +From: Florian Westphal + +[ Upstream commit ca6c5d4ad216d5942ae544bbf02503041bd802aa ] + +local_df means 'ignore DF bit if set', so if its set we're +allowed to perform ip fragmentation. + +This wasn't noticed earlier because the output path also drops such skbs +(and emits needed icmp error) and because netfilter ip defrag did not +set local_df until couple of days ago. + +Only difference is that DF-packets-larger-than MTU now discarded +earlier (f.e. we avoid pointless netfilter postrouting trip). + +While at it, drop the repeated test ip_exceeds_mtu, checking it once +is enough... + +Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_forward.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_forward.c ++++ b/net/ipv4/ip_forward.c +@@ -42,12 +42,12 @@ + static bool ip_may_fragment(const struct sk_buff *skb) + { + return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || +- !skb->local_df; ++ skb->local_df; + } + + static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) + { +- if (skb->len <= mtu || skb->local_df) ++ if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) diff --git a/queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch b/queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch new file mode 100644 index 00000000000..c8a7be61f96 --- /dev/null +++ b/queue-3.4/net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch @@ -0,0 +1,81 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Daniel Borkmann +Date: Wed, 9 Apr 2014 16:10:20 +0200 +Subject: net: sctp: test if association is dead in sctp_wake_up_waiters + +From: Daniel Borkmann + +[ Upstream commit 1e1cdf8ac78793e0875465e98a648df64694a8d0 ] + +In function sctp_wake_up_waiters(), we need to involve a test +if the association is declared dead. If so, we don't have any +reference to a possible sibling association anymore and need +to invoke sctp_write_space() instead, and normally walk the +socket's associations and notify them of new wmem space. The +reason for special casing is that otherwise, we could run +into the following issue when a sctp_primitive_SEND() call +from sctp_sendmsg() fails, and tries to flush an association's +outq, i.e. in the following way: + +sctp_association_free() +`-> list_del(&asoc->asocs) <-- poisons list pointer + asoc->base.dead = true + sctp_outq_free(&asoc->outqueue) + `-> __sctp_outq_teardown() + `-> sctp_chunk_free() + `-> consume_skb() + `-> sctp_wfree() + `-> sctp_wake_up_waiters() <-- dereferences poisoned pointers + if asoc->ep->sndbuf_policy=0 + +Therefore, only walk the list in an 'optimized' way if we find +that the current association is still active. We could also use +list_del_init() in addition when we call sctp_association_free(), +but as Vlad suggests, we want to trap such bugs and thus leave +it poisoned as is. + +Why is it safe to resolve the issue by testing for asoc->base.dead? +Parallel calls to sctp_sendmsg() are protected under socket lock, +that is lock_sock()/release_sock(). Only within that path under +lock held, we're setting skb/chunk owner via sctp_set_owner_w(). +Eventually, chunks are freed directly by an association still +under that lock. So when traversing association list on destruction +time from sctp_wake_up_waiters() via sctp_wfree(), a different +CPU can't be running sctp_wfree() while another one calls +sctp_association_free() as both happens under the same lock. +Therefore, this can also not race with setting/testing against +asoc->base.dead as we are guaranteed for this to happen in order, +under lock. Further, Vlad says: the times we check asoc->base.dead +is when we've cached an association pointer for later processing. +In between cache and processing, the association may have been +freed and is simply still around due to reference counts. We check +asoc->base.dead under a lock, so it should always be safe to check +and not race against sctp_association_free(). Stress-testing seems +fine now, too. + +Fixes: cd253f9f357d ("net: sctp: wake up all assocs if sndbuf policy is per socket") +Signed-off-by: Daniel Borkmann +Cc: Vlad Yasevich +Acked-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -6380,6 +6380,12 @@ static void sctp_wake_up_waiters(struct + if (asoc->ep->sndbuf_policy) + return __sctp_write_space(asoc); + ++ /* If association goes down and is just flushing its ++ * outq, then just normally notify others. ++ */ ++ if (asoc->base.dead) ++ return sctp_write_space(sk); ++ + /* Accounting for the sndbuf space is per socket, so we + * need to wake up others, try to be fair and in case of + * other associations, let them have a go first instead diff --git a/queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch b/queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch new file mode 100644 index 00000000000..0eed9e6bbf0 --- /dev/null +++ b/queue-3.4/net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch @@ -0,0 +1,116 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Daniel Borkmann +Date: Tue, 8 Apr 2014 17:26:13 +0200 +Subject: net: sctp: wake up all assocs if sndbuf policy is per socket + +From: Daniel Borkmann + +[ Upstream commit 52c35befb69b005c3fc5afdaae3a5717ad013411 ] + +SCTP charges chunks for wmem accounting via skb->truesize in +sctp_set_owner_w(), and sctp_wfree() respectively as the +reverse operation. If a sender runs out of wmem, it needs to +wait via sctp_wait_for_sndbuf(), and gets woken up by a call +to __sctp_write_space() mostly via sctp_wfree(). + +__sctp_write_space() is being called per association. Although +we assign sk->sk_write_space() to sctp_write_space(), which +is then being done per socket, it is only used if send space +is increased per socket option (SO_SNDBUF), as SOCK_USE_WRITE_QUEUE +is set and therefore not invoked in sock_wfree(). + +Commit 4c3a5bdae293 ("sctp: Don't charge for data in sndbuf +again when transmitting packet") fixed an issue where in case +sctp_packet_transmit() manages to queue up more than sndbuf +bytes, sctp_wait_for_sndbuf() will never be woken up again +unless it is interrupted by a signal. However, a still +remaining issue is that if net.sctp.sndbuf_policy=0, that is +accounting per socket, and one-to-many sockets are in use, +the reclaimed write space from sctp_wfree() is 'unfairly' +handed back on the server to the association that is the lucky +one to be woken up again via __sctp_write_space(), while +the remaining associations are never be woken up again +(unless by a signal). + +The effect disappears with net.sctp.sndbuf_policy=1, that +is wmem accounting per association, as it guarantees a fair +share of wmem among associations. + +Therefore, if we have reclaimed memory in case of per socket +accounting, wake all related associations to a socket in a +fair manner, that is, traverse the socket association list +starting from the current neighbour of the association and +issue a __sctp_write_space() to everyone until we end up +waking ourselves. This guarantees that no association is +preferred over another and even if more associations are +taken into the one-to-many session, all receivers will get +messages from the server and are not stalled forever on +high load. This setting still leaves the advantage of per +socket accounting in touch as an association can still use +up global limits if unused by others. + +Fixes: 4eb701dfc618 ("[SCTP] Fix SCTP sendbuffer accouting.") +Signed-off-by: Daniel Borkmann +Cc: Thomas Graf +Cc: Neil Horman +Cc: Vlad Yasevich +Acked-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -6369,6 +6369,40 @@ static void __sctp_write_space(struct sc + } + } + ++static void sctp_wake_up_waiters(struct sock *sk, ++ struct sctp_association *asoc) ++{ ++ struct sctp_association *tmp = asoc; ++ ++ /* We do accounting for the sndbuf space per association, ++ * so we only need to wake our own association. ++ */ ++ if (asoc->ep->sndbuf_policy) ++ return __sctp_write_space(asoc); ++ ++ /* Accounting for the sndbuf space is per socket, so we ++ * need to wake up others, try to be fair and in case of ++ * other associations, let them have a go first instead ++ * of just doing a sctp_write_space() call. ++ * ++ * Note that we reach sctp_wake_up_waiters() only when ++ * associations free up queued chunks, thus we are under ++ * lock and the list of associations on a socket is ++ * guaranteed not to change. ++ */ ++ for (tmp = list_next_entry(tmp, asocs); 1; ++ tmp = list_next_entry(tmp, asocs)) { ++ /* Manually skip the head element. */ ++ if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs)) ++ continue; ++ /* Wake up association. */ ++ __sctp_write_space(tmp); ++ /* We've reached the end. */ ++ if (tmp == asoc) ++ break; ++ } ++} ++ + /* Do accounting for the sndbuf space. + * Decrement the used sndbuf space of the corresponding association by the + * data size which was just transmitted(freed). +@@ -6396,7 +6430,7 @@ static void sctp_wfree(struct sk_buff *s + sk_mem_uncharge(sk, skb->truesize); + + sock_wfree(skb); +- __sctp_write_space(asoc); ++ sctp_wake_up_waiters(sk, asoc); + + sctp_association_put(asoc); + } diff --git a/queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch b/queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch new file mode 100644 index 00000000000..b5505992078 --- /dev/null +++ b/queue-3.4/revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch @@ -0,0 +1,59 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Vlad Yasevich +Date: Tue, 29 Apr 2014 10:09:51 -0400 +Subject: Revert "macvlan : fix checksums error when we are in bridge mode" + +From: Vlad Yasevich + +[ Upstream commit f114890cdf84d753f6b41cd0cc44ba51d16313da ] + +This reverts commit 12a2856b604476c27d85a5f9a57ae1661fc46019. +The commit above doesn't appear to be necessary any more as the +checksums appear to be correctly computed/validated. + +Additionally the above commit breaks kvm configurations where +one VM is using a device that support checksum offload (virtio) and +the other VM does not. +In this case, packets leaving virtio device will have CHECKSUM_PARTIAL +set. The packets is forwarded to a macvtap that has offload features +turned off. Since we use CHECKSUM_UNNECESSARY, the host does does not +update the checksum and thus a bad checksum is passed up to +the guest. + +CC: Daniel Lezcano +CC: Patrick McHardy +CC: Andrian Nord +CC: Eric Dumazet +CC: Michael S. Tsirkin +CC: Jason Wang +Signed-off-by: Vlad Yasevich +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -237,11 +237,9 @@ static int macvlan_queue_xmit(struct sk_ + const struct macvlan_dev *vlan = netdev_priv(dev); + const struct macvlan_port *port = vlan->port; + const struct macvlan_dev *dest; +- __u8 ip_summed = skb->ip_summed; + + if (vlan->mode == MACVLAN_MODE_BRIDGE) { + const struct ethhdr *eth = (void *)skb->data; +- skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* send to other bridge ports directly */ + if (is_multicast_ether_addr(eth->h_dest)) { +@@ -259,7 +257,6 @@ static int macvlan_queue_xmit(struct sk_ + } + + xmit_world: +- skb->ip_summed = ip_summed; + skb->dev = vlan->lowerdev; + return dev_queue_xmit(skb); + } diff --git a/queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch b/queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch new file mode 100644 index 00000000000..2c367264f9c --- /dev/null +++ b/queue-3.4/rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch @@ -0,0 +1,98 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: David Gibson +Date: Thu, 24 Apr 2014 10:22:36 +1000 +Subject: rtnetlink: Only supply IFLA_VF_PORTS information when RTEXT_FILTER_VF is set + +From: David Gibson + +[ Upstream commit c53864fd60227de025cb79e05493b13f69843971 ] + +Since 115c9b81928360d769a76c632bae62d15206a94a (rtnetlink: Fix problem with +buffer allocation), RTM_NEWLINK messages only contain the IFLA_VFINFO_LIST +attribute if they were solicited by a GETLINK message containing an +IFLA_EXT_MASK attribute with the RTEXT_FILTER_VF flag. + +That was done because some user programs broke when they received more data +than expected - because IFLA_VFINFO_LIST contains information for each VF +it can become large if there are many VFs. + +However, the IFLA_VF_PORTS attribute, supplied for devices which implement +ndo_get_vf_port (currently the 'enic' driver only), has the same problem. +It supplies per-VF information and can therefore become large, but it is +not currently conditional on the IFLA_EXT_MASK value. + +Worse, it interacts badly with the existing EXT_MASK handling. When +IFLA_EXT_MASK is not supplied, the buffer for netlink replies is fixed at +NLMSG_GOODSIZE. If the information for IFLA_VF_PORTS exceeds this, then +rtnl_fill_ifinfo() returns -EMSGSIZE on the first message in a packet. +netlink_dump() will misinterpret this as having finished the listing and +omit data for this interface and all subsequent ones. That can cause +getifaddrs(3) to enter an infinite loop. + +This patch addresses the problem by only supplying IFLA_VF_PORTS when +IFLA_EXT_MASK is supplied with the RTEXT_FILTER_VF flag set. + +Signed-off-by: David Gibson +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -746,7 +746,8 @@ static inline int rtnl_vfinfo_size(const + return 0; + } + +-static size_t rtnl_port_size(const struct net_device *dev) ++static size_t rtnl_port_size(const struct net_device *dev, ++ u32 ext_filter_mask) + { + size_t port_size = nla_total_size(4) /* PORT_VF */ + + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ +@@ -762,7 +763,8 @@ static size_t rtnl_port_size(const struc + size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + + port_size; + +- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) ++ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || ++ !(ext_filter_mask & RTEXT_FILTER_VF)) + return 0; + if (dev_num_vf(dev->dev.parent)) + return port_self_size + vf_ports_size + +@@ -793,7 +795,7 @@ static noinline size_t if_nlmsg_size(con + + nla_total_size(ext_filter_mask + & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ +- + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ ++ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ + } +@@ -853,11 +855,13 @@ static int rtnl_port_self_fill(struct sk + return 0; + } + +-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) ++static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, ++ u32 ext_filter_mask) + { + int err; + +- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) ++ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || ++ !(ext_filter_mask & RTEXT_FILTER_VF)) + return 0; + + err = rtnl_port_self_fill(skb, dev); +@@ -1004,7 +1008,7 @@ static int rtnl_fill_ifinfo(struct sk_bu + nla_nest_end(skb, vfinfo); + } + +- if (rtnl_port_fill(skb, dev)) ++ if (rtnl_port_fill(skb, dev, ext_filter_mask)) + goto nla_put_failure; + + if (dev->rtnl_link_ops) { diff --git a/queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch b/queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch new file mode 100644 index 00000000000..aea69dd1f71 --- /dev/null +++ b/queue-3.4/rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch @@ -0,0 +1,63 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: David Gibson +Date: Thu, 24 Apr 2014 10:22:35 +1000 +Subject: rtnetlink: Warn when interface's information won't fit in our packet + +From: David Gibson + +[ Upstream commit 973462bbde79bb827824c73b59027a0aed5c9ca6 ] + +Without IFLA_EXT_MASK specified, the information reported for a single +interface in response to RTM_GETLINK is expected to fit within a netlink +packet of NLMSG_GOODSIZE. + +If it doesn't, however, things will go badly wrong, When listing all +interfaces, netlink_dump() will incorrectly treat -EMSGSIZE on the first +message in a packet as the end of the listing and omit information for +that interface and all subsequent ones. This can cause getifaddrs(3) to +enter an infinite loop. + +This patch won't fix the problem, but it will WARN_ON() making it easier to +track down what's going wrong. + +Signed-off-by: David Gibson +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1059,6 +1059,7 @@ static int rtnl_dump_ifinfo(struct sk_bu + struct hlist_node *node; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; ++ int err; + + s_h = cb->args[0]; + s_idx = cb->args[1]; +@@ -1079,11 +1080,17 @@ static int rtnl_dump_ifinfo(struct sk_bu + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; +- if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, +- NETLINK_CB(cb->skb).pid, +- cb->nlh->nlmsg_seq, 0, +- NLM_F_MULTI, +- ext_filter_mask) <= 0) ++ err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, ++ NETLINK_CB(cb->skb).pid, ++ cb->nlh->nlmsg_seq, 0, ++ NLM_F_MULTI, ++ ext_filter_mask); ++ /* If we ran out of room on the first message, ++ * we're in trouble ++ */ ++ WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); ++ ++ if (err <= 0) + goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); diff --git a/queue-3.4/series b/queue-3.4/series index 728eae2244b..35efbbf6c24 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -11,3 +11,23 @@ usb-option-add-alcatel-l800ma.patch usb-option-add-and-update-a-number-of-cmotech-devices.patch drm-vmwgfx-correct-fb_fix_screeninfo.line_length.patch drm-radeon-call-drm_edid_to_eld-when-we-update-the-edid.patch +list-introduce-list_next_entry-and-list_prev_entry.patch +net-sctp-wake-up-all-assocs-if-sndbuf-policy-is-per-socket.patch +net-sctp-test-if-association-is-dead-in-sctp_wake_up_waiters.patch +l2tp-take-pmtu-from-tunnel-udp-socket.patch +net-core-don-t-account-for-udp-header-size-when-computing-seglen.patch +bonding-remove-debug_fs-files-when-module-init-fails.patch +ipv6-limit-mtu-to-65575-bytes.patch +net-ipv4-current-group_info-should-be-put-after-using.patch +filter-prevent-nla-extensions-to-peek-beyond-the-end-of-the-message.patch +tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch +rtnetlink-warn-when-interface-s-information-won-t-fit-in-our-packet.patch +rtnetlink-only-supply-ifla_vf_ports-information-when-rtext_filter_vf-is-set.patch +revert-macvlan-fix-checksums-error-when-we-are-in-bridge-mode.patch +tcp_cubic-fix-the-range-of-delayed_ack.patch +net-ipv4-ip_forward-fix-inverted-local_df-test.patch +ipv4-fib_semantics-increment-fib_info_cnt-after-fib_info-allocation.patch +act_mirred-do-not-drop-packets-when-fails-to-mirror-it.patch +ipv4-initialise-the-itag-variable-in-__mkroute_input.patch +skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch +net-gro-reset-skb-truesize-in-napi_reuse_skb.patch diff --git a/queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch b/queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch new file mode 100644 index 00000000000..44772430d60 --- /dev/null +++ b/queue-3.4/skb-add-inline-helper-for-getting-the-skb-end-offset-from-head.patch @@ -0,0 +1,143 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Alexander Duyck +Date: Fri, 4 May 2012 14:26:56 +0000 +Subject: skb: Add inline helper for getting the skb end offset from head + +From: Alexander Duyck + +[ Upstream commit ec47ea82477404631d49b8e568c71826c9b663ac ] + +With the recent changes for how we compute the skb truesize it occurs to me +we are probably going to have a lot of calls to skb_end_pointer - +skb->head. Instead of running all over the place doing that it would make +more sense to just make it a separate inline skb_end_offset(skb) that way +we can return the correct value without having gcc having to do all the +optimization to cancel out skb->head - skb->head. + +Signed-off-by: Alexander Duyck +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/atm/ambassador.c | 2 +- + drivers/atm/idt77252.c | 2 +- + drivers/net/wimax/i2400m/usb-rx.c | 2 +- + drivers/staging/octeon/ethernet-tx.c | 2 +- + include/linux/skbuff.h | 12 +++++++++++- + net/core/skbuff.c | 9 ++++----- + 6 files changed, 19 insertions(+), 10 deletions(-) + +--- a/drivers/atm/ambassador.c ++++ b/drivers/atm/ambassador.c +@@ -802,7 +802,7 @@ static void fill_rx_pool (amb_dev * dev, + } + // cast needed as there is no %? for pointer differences + PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li", +- skb, skb->head, (long) (skb_end_pointer(skb) - skb->head)); ++ skb, skb->head, (long) skb_end_offset(skb)); + rx.handle = virt_to_bus (skb); + rx.host_address = cpu_to_be32 (virt_to_bus (skb->data)); + if (rx_give (dev, &rx, pool)) +--- a/drivers/atm/idt77252.c ++++ b/drivers/atm/idt77252.c +@@ -1258,7 +1258,7 @@ idt77252_rx_raw(struct idt77252_dev *car + tail = readl(SAR_REG_RAWCT); + + pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue), +- skb_end_pointer(queue) - queue->head - 16, ++ skb_end_offset(queue) - 16, + PCI_DMA_FROMDEVICE); + + while (head != tail) { +--- a/drivers/net/wimax/i2400m/usb-rx.c ++++ b/drivers/net/wimax/i2400m/usb-rx.c +@@ -277,7 +277,7 @@ retry: + d_printf(1, dev, "RX: size changed to %d, received %d, " + "copied %d, capacity %ld\n", + rx_size, read_size, rx_skb->len, +- (long) (skb_end_pointer(new_skb) - new_skb->head)); ++ (long) skb_end_offset(new_skb)); + goto retry; + } + /* In most cases, it happens due to the hardware scheduling a +--- a/drivers/staging/octeon/ethernet-tx.c ++++ b/drivers/staging/octeon/ethernet-tx.c +@@ -345,7 +345,7 @@ int cvm_oct_xmit(struct sk_buff *skb, st + } + if (unlikely + (skb->truesize != +- sizeof(*skb) + skb_end_pointer(skb) - skb->head)) { ++ sizeof(*skb) + skb_end_offset(skb))) { + /* + printk("TX buffer truesize has been changed\n"); + */ +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -640,11 +640,21 @@ static inline unsigned char *skb_end_poi + { + return skb->head + skb->end; + } ++ ++static inline unsigned int skb_end_offset(const struct sk_buff *skb) ++{ ++ return skb->end; ++} + #else + static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) + { + return skb->end; + } ++ ++static inline unsigned int skb_end_offset(const struct sk_buff *skb) ++{ ++ return skb->end - skb->head; ++} + #endif + + /* Internal */ +@@ -2574,7 +2584,7 @@ static inline bool skb_is_recycleable(co + return false; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); +- if (skb_end_pointer(skb) - skb->head < skb_size) ++ if (skb_end_offset(skb) < skb_size) + return false; + + if (skb_shared(skb) || skb_cloned(skb)) +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -821,7 +821,7 @@ static void copy_skb_header(struct sk_bu + struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) + { + int headerlen = skb_headroom(skb); +- unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len; ++ unsigned int size = skb_end_offset(skb) + skb->data_len; + struct sk_buff *n = alloc_skb(size, gfp_mask); + + if (!n) +@@ -922,7 +922,7 @@ int pskb_expand_head(struct sk_buff *skb + { + int i; + u8 *data; +- int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail; ++ int size = nhead + skb_end_offset(skb) + ntail; + long off; + bool fastpath; + +@@ -2721,14 +2721,13 @@ struct sk_buff *skb_segment(struct sk_bu + if (unlikely(!nskb)) + goto err; + +- hsize = skb_end_pointer(nskb) - nskb->head; ++ hsize = skb_end_offset(nskb); + if (skb_cow_head(nskb, doffset + headroom)) { + kfree_skb(nskb); + goto err; + } + +- nskb->truesize += skb_end_pointer(nskb) - nskb->head - +- hsize; ++ nskb->truesize += skb_end_offset(nskb) - hsize; + skb_release_head_state(nskb); + __skb_push(nskb, doffset); + } else { diff --git a/queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch b/queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch new file mode 100644 index 00000000000..1bae7513740 --- /dev/null +++ b/queue-3.4/tcp_cubic-fix-the-range-of-delayed_ack.patch @@ -0,0 +1,44 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Liu Yu +Date: Wed, 30 Apr 2014 17:34:09 +0800 +Subject: tcp_cubic: fix the range of delayed_ack + +From: Liu Yu + +[ Upstream commit 0cda345d1b2201dd15591b163e3c92bad5191745 ] + +commit b9f47a3aaeab (tcp_cubic: limit delayed_ack ratio to prevent +divide error) try to prevent divide error, but there is still a little +chance that delayed_ack can reach zero. In case the param cnt get +negative value, then ratio+cnt would overflow and may happen to be zero. +As a result, min(ratio, ACK_RATIO_LIMIT) will calculate to be zero. + +In some old kernels, such as 2.6.32, there is a bug that would +pass negative param, which then ultimately leads to this divide error. + +commit 5b35e1e6e9c (tcp: fix tcp_trim_head() to adjust segment count +with skb MSS) fixed the negative param issue. However, +it's safe that we fix the range of delayed_ack as well, +to make sure we do not hit a divide by zero. + +CC: Stephen Hemminger +Signed-off-by: Liu Yu +Signed-off-by: Eric Dumazet +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cubic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -408,7 +408,7 @@ static void bictcp_acked(struct sock *sk + ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; + ratio += cnt; + +- ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); ++ ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); + } + + /* Some calls are for duplicates without timetamps */ diff --git a/queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch b/queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch new file mode 100644 index 00000000000..ad3621e22a0 --- /dev/null +++ b/queue-3.4/tg3-update-rx_jumbo_pending-ring-param-only-when-jumbo-frames-are-enabled.patch @@ -0,0 +1,42 @@ +From foo@baz Wed May 28 20:24:34 PDT 2014 +From: Ivan Vecera +Date: Thu, 17 Apr 2014 14:51:08 +0200 +Subject: tg3: update rx_jumbo_pending ring param only when jumbo frames are enabled + +From: Ivan Vecera + +The patch fixes a problem with dropped jumbo frames after usage of +'ethtool -G ... rx'. + +Scenario: +1. ip link set eth0 up +2. ethtool -G eth0 rx N # <- This zeroes rx-jumbo +3. ip link set mtu 9000 dev eth0 + +The ethtool command set rx_jumbo_pending to zero so any received jumbo +packets are dropped and you need to use 'ethtool -G eth0 rx-jumbo N' +to workaround the issue. +The patch changes the logic so rx_jumbo_pending value is changed only if +jumbo frames are enabled (MTU > 1500). + +Signed-off-by: Ivan Vecera +Acked-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -10861,7 +10861,9 @@ static int tg3_set_ringparam(struct net_ + if (tg3_flag(tp, MAX_RXPEND_64) && + tp->rx_pending > 63) + tp->rx_pending = 63; +- tp->rx_jumbo_pending = ering->rx_jumbo_pending; ++ ++ if (tg3_flag(tp, JUMBO_RING_ENABLE)) ++ tp->rx_jumbo_pending = ering->rx_jumbo_pending; + + for (i = 0; i < tp->irq_max; i++) + tp->napi[i].tx_pending = ering->tx_pending;