From: Greg Kroah-Hartman Date: Wed, 22 Feb 2012 23:52:41 +0000 (-0800) Subject: 3.2-stable patches X-Git-Tag: v3.2.8~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=11efcb0e231ddd8f2bcb85691aa4522f8451b58a;p=thirdparty%2Fkernel%2Fstable-queue.git 3.2-stable patches added patches: 3c59x-shorten-timer-period-for-slave-devices.patch gro-more-generic-l2-header-check.patch ipoib-stop-lying-about-hard_header_len-and-use-skb-cb-to-stash-ll-addresses.patch ipv4-fix-wrong-order-of-ip_rt_get_source-and-update-iph-daddr.patch ipv4-reset-flowi-parameters-on-route-connect.patch net-don-t-proxy-arp-respond-if-iif-rt-dst.dev-if-private-vlan-is-disabled.patch net-make-qdisc_skb_cb-upper-size-bound-explicit.patch netpoll-netpoll_poll_dev-should-access-dev-flags.patch net_sched-bug-in-netem-reordering.patch tcp-allow-tcp_sacktag_one-to-tag-ranges-not-aligned-with-skbs.patch tcp-fix-range-tcp_shifted_skb-passes-to-tcp_sacktag_one.patch tcp-fix-tcp_shifted_skb-adjustment-of-lost_cnt_hint-for-fack.patch tcp_v4_send_reset-binding-oif-to-iif-in-no-sock-case.patch veth-enforce-minimum-size-of-veth_info_peer.patch via-velocity-s3-resume-fix.patch --- diff --git a/queue-3.2/3c59x-shorten-timer-period-for-slave-devices.patch b/queue-3.2/3c59x-shorten-timer-period-for-slave-devices.patch new file mode 100644 index 00000000000..390db4e788c --- /dev/null +++ b/queue-3.2/3c59x-shorten-timer-period-for-slave-devices.patch @@ -0,0 +1,40 @@ +From c27fb212bb0f4dc06721eed76316c465564fa882 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 14 Feb 2012 10:27:09 +0000 +Subject: 3c59x: shorten timer period for slave devices + + +From: Eric Dumazet + +[ Upstream commit 3013dc0cceb9baaf25d5624034eeaa259bf99004 ] + +Jean Delvare reported bonding on top of 3c59x adapters was not detecting +network cable removal fast enough. + +3c59x indeed uses a 60 seconds timer to check link status if carrier is +on, and 5 seconds if carrier is off. + +This patch reduces timer period to 5 seconds if device is a bonding +slave. + +Reported-by: Jean Delvare +Acked-by: Jean Delvare +Acked-by: Steffen Klassert +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/3com/3c59x.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/3com/3c59x.c ++++ b/drivers/net/ethernet/3com/3c59x.c +@@ -1842,7 +1842,7 @@ vortex_timer(unsigned long data) + ok = 1; + } + +- if (!netif_carrier_ok(dev)) ++ if (dev->flags & IFF_SLAVE || !netif_carrier_ok(dev)) + next_tick = 5*HZ; + + if (vp->medialock) diff --git a/queue-3.2/gro-more-generic-l2-header-check.patch b/queue-3.2/gro-more-generic-l2-header-check.patch new file mode 100644 index 00000000000..8ec8a9d456e --- /dev/null +++ b/queue-3.2/gro-more-generic-l2-header-check.patch @@ -0,0 +1,59 @@ +From 61da319dee13f93077be3c40d226805a7c7b57b5 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 8 Feb 2012 08:51:50 +0000 +Subject: gro: more generic L2 header check + + +From: Eric Dumazet + +[ Upstream commit 5ca3b72c5da47d95b83857b768def6172fbc080a ] + +Shlomo Pongratz reported GRO L2 header check was suited for Ethernet +only, and failed on IB/ipoib traffic. + +He provided a patch faking a zeroed header to let GRO aggregates frames. + +Roland Dreier, Herbert Xu, and others suggested we change GRO L2 header +check to be more generic, ie not assuming L2 header is 14 bytes, but +taking into account hard_header_len. + +__napi_gro_receive() has special handling for the common case (Ethernet) +to avoid a memcmp() call and use an inline optimized function instead. + +Signed-off-by: Eric Dumazet +Reported-by: Shlomo Pongratz +Cc: Roland Dreier +Cc: Or Gerlitz +Cc: Herbert Xu +Tested-by: Sean Hefty +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3565,14 +3565,20 @@ static inline gro_result_t + __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) + { + struct sk_buff *p; ++ unsigned int maclen = skb->dev->hard_header_len; + + for (p = napi->gro_list; p; p = p->next) { + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= p->vlan_tci ^ skb->vlan_tci; +- diffs |= compare_ether_header(skb_mac_header(p), +- skb_gro_mac_header(skb)); ++ if (maclen == ETH_HLEN) ++ diffs |= compare_ether_header(skb_mac_header(p), ++ skb_gro_mac_header(skb)); ++ else if (!diffs) ++ diffs = memcmp(skb_mac_header(p), ++ skb_gro_mac_header(skb), ++ maclen); + NAPI_GRO_CB(p)->same_flow = !diffs; + NAPI_GRO_CB(p)->flush = 0; + } diff --git a/queue-3.2/ipoib-stop-lying-about-hard_header_len-and-use-skb-cb-to-stash-ll-addresses.patch b/queue-3.2/ipoib-stop-lying-about-hard_header_len-and-use-skb-cb-to-stash-ll-addresses.patch new file mode 100644 index 00000000000..39e737ff36f --- /dev/null +++ b/queue-3.2/ipoib-stop-lying-about-hard_header_len-and-use-skb-cb-to-stash-ll-addresses.patch @@ -0,0 +1,206 @@ +From 57bdde484db3348f48ef6638611180eea2a0c6f5 Mon Sep 17 00:00:00 2001 +From: Roland Dreier +Date: Tue, 7 Feb 2012 14:51:21 +0000 +Subject: IPoIB: Stop lying about hard_header_len and use skb->cb to stash LL addresses + + +From: Roland Dreier + +[ Upstream commit 936d7de3d736e0737542641269436f4b5968e9ef ] + +Commit a0417fa3a18a ("net: Make qdisc_skb_cb upper size bound +explicit.") made it possible for a netdev driver to use skb->cb +between its header_ops.create method and its .ndo_start_xmit +method. Use this in ipoib_hard_header() to stash away the LL address +(GID + QPN), instead of the "ipoib_pseudoheader" hack. This allows +IPoIB to stop lying about its hard_header_len, which will let us fix +the L2 check for GRO. + +Signed-off-by: Roland Dreier +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/ulp/ipoib/ipoib.h | 6 +- + drivers/infiniband/ulp/ipoib/ipoib_main.c | 55 ++++++++----------------- + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 9 ---- + 3 files changed, 24 insertions(+), 46 deletions(-) + +--- a/drivers/infiniband/ulp/ipoib/ipoib.h ++++ b/drivers/infiniband/ulp/ipoib/ipoib.h +@@ -44,6 +44,7 @@ + #include + + #include ++#include + + #include + +@@ -117,8 +118,9 @@ struct ipoib_header { + u16 reserved; + }; + +-struct ipoib_pseudoheader { +- u8 hwaddr[INFINIBAND_ALEN]; ++struct ipoib_cb { ++ struct qdisc_skb_cb qdisc_cb; ++ u8 hwaddr[INFINIBAND_ALEN]; + }; + + /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ +--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c +@@ -658,7 +658,7 @@ static void ipoib_path_lookup(struct sk_ + } + + static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, +- struct ipoib_pseudoheader *phdr) ++ struct ipoib_cb *cb) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path; +@@ -666,17 +666,15 @@ static void unicast_arp_send(struct sk_b + + spin_lock_irqsave(&priv->lock, flags); + +- path = __path_find(dev, phdr->hwaddr + 4); ++ path = __path_find(dev, cb->hwaddr + 4); + if (!path || !path->valid) { + int new_path = 0; + + if (!path) { +- path = path_rec_create(dev, phdr->hwaddr + 4); ++ path = path_rec_create(dev, cb->hwaddr + 4); + new_path = 1; + } + if (path) { +- /* put pseudoheader back on for next time */ +- skb_push(skb, sizeof *phdr); + __skb_queue_tail(&path->queue, skb); + + if (!path->query && path_rec_start(dev, path)) { +@@ -700,12 +698,10 @@ static void unicast_arp_send(struct sk_b + be16_to_cpu(path->pathrec.dlid)); + + spin_unlock_irqrestore(&priv->lock, flags); +- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); ++ ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); + return; + } else if ((path->query || !path_rec_start(dev, path)) && + skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { +- /* put pseudoheader back on for next time */ +- skb_push(skb, sizeof *phdr); + __skb_queue_tail(&path->queue, skb); + } else { + ++dev->stats.tx_dropped; +@@ -774,16 +770,14 @@ static int ipoib_start_xmit(struct sk_bu + dev_kfree_skb_any(skb); + } + } else { +- struct ipoib_pseudoheader *phdr = +- (struct ipoib_pseudoheader *) skb->data; +- skb_pull(skb, sizeof *phdr); ++ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + +- if (phdr->hwaddr[4] == 0xff) { ++ if (cb->hwaddr[4] == 0xff) { + /* Add in the P_Key for multicast*/ +- phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; +- phdr->hwaddr[9] = priv->pkey & 0xff; ++ cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; ++ cb->hwaddr[9] = priv->pkey & 0xff; + +- ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); ++ ipoib_mcast_send(dev, cb->hwaddr + 4, skb); + } else { + /* unicast GID -- should be ARP or RARP reply */ + +@@ -792,14 +786,14 @@ static int ipoib_start_xmit(struct sk_bu + ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", + skb_dst(skb) ? "neigh" : "dst", + be16_to_cpup((__be16 *) skb->data), +- IPOIB_QPN(phdr->hwaddr), +- phdr->hwaddr + 4); ++ IPOIB_QPN(cb->hwaddr), ++ cb->hwaddr + 4); + dev_kfree_skb_any(skb); + ++dev->stats.tx_dropped; + goto unlock; + } + +- unicast_arp_send(skb, dev, phdr); ++ unicast_arp_send(skb, dev, cb); + } + } + unlock: +@@ -825,8 +819,6 @@ static int ipoib_hard_header(struct sk_b + const void *daddr, const void *saddr, unsigned len) + { + struct ipoib_header *header; +- struct dst_entry *dst; +- struct neighbour *n; + + header = (struct ipoib_header *) skb_push(skb, sizeof *header); + +@@ -834,18 +826,13 @@ static int ipoib_hard_header(struct sk_b + header->reserved = 0; + + /* +- * If we don't have a neighbour structure, stuff the +- * destination address onto the front of the skb so we can +- * figure out where to send the packet later. ++ * If we don't have a dst_entry structure, stuff the ++ * destination address into skb->cb so we can figure out where ++ * to send the packet later. + */ +- dst = skb_dst(skb); +- n = NULL; +- if (dst) +- n = dst_get_neighbour_raw(dst); +- if ((!dst || !n) && daddr) { +- struct ipoib_pseudoheader *phdr = +- (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); +- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); ++ if (!skb_dst(skb)) { ++ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; ++ memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); + } + + return 0; +@@ -1021,11 +1008,7 @@ static void ipoib_setup(struct net_devic + + dev->flags |= IFF_BROADCAST | IFF_MULTICAST; + +- /* +- * We add in INFINIBAND_ALEN to allow for the destination +- * address "pseudoheader" for skbs without neighbour struct. +- */ +- dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; ++ dev->hard_header_len = IPOIB_ENCAP_LEN; + dev->addr_len = INFINIBAND_ALEN; + dev->type = ARPHRD_INFINIBAND; + dev->tx_queue_len = ipoib_sendq_size * 2; +--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +@@ -262,21 +262,14 @@ static int ipoib_mcast_join_finish(struc + netif_tx_lock_bh(dev); + while (!skb_queue_empty(&mcast->pkt_queue)) { + struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); +- struct dst_entry *dst = skb_dst(skb); +- struct neighbour *n = NULL; + + netif_tx_unlock_bh(dev); + + skb->dev = dev; +- if (dst) +- n = dst_get_neighbour_raw(dst); +- if (!dst || !n) { +- /* put pseudoheader back on for next time */ +- skb_push(skb, sizeof (struct ipoib_pseudoheader)); +- } + + if (dev_queue_xmit(skb)) + ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); ++ + netif_tx_lock_bh(dev); + } + netif_tx_unlock_bh(dev); diff --git a/queue-3.2/ipv4-fix-wrong-order-of-ip_rt_get_source-and-update-iph-daddr.patch b/queue-3.2/ipv4-fix-wrong-order-of-ip_rt_get_source-and-update-iph-daddr.patch new file mode 100644 index 00000000000..c44561ae450 --- /dev/null +++ b/queue-3.2/ipv4-fix-wrong-order-of-ip_rt_get_source-and-update-iph-daddr.patch @@ -0,0 +1,35 @@ +From 29bdb2ab5dc1d1e440f040c8afb42803f24d92e5 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Thu, 9 Feb 2012 21:15:25 +0000 +Subject: ipv4: Fix wrong order of ip_rt_get_source() and update iph->daddr. + + +From: Li Wei + +[ Upstream commit 5dc7883f2a7c25f8df40d7479687153558cd531b ] + +This patch fix a bug which introduced by commit ac8a4810 (ipv4: Save +nexthop address of LSRR/SSRR option to IPCB.).In that patch, we saved +the nexthop of SRR in ip_option->nexthop and update iph->daddr until +we get to ip_forward_options(), but we need to update it before +ip_rt_get_source(), otherwise we may get a wrong src. + +Signed-off-by: Li Wei +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_options.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/ip_options.c ++++ b/net/ipv4/ip_options.c +@@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff * + } + if (srrptr + 3 <= srrspace) { + opt->is_changed = 1; +- ip_rt_get_source(&optptr[srrptr-1], skb, rt); + ip_hdr(skb)->daddr = opt->nexthop; ++ ip_rt_get_source(&optptr[srrptr-1], skb, rt); + optptr[2] = srrptr+4; + } else if (net_ratelimit()) + printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); diff --git a/queue-3.2/ipv4-reset-flowi-parameters-on-route-connect.patch b/queue-3.2/ipv4-reset-flowi-parameters-on-route-connect.patch new file mode 100644 index 00000000000..dbbf9531cfd --- /dev/null +++ b/queue-3.2/ipv4-reset-flowi-parameters-on-route-connect.patch @@ -0,0 +1,86 @@ +From 5589b034dd41fd36a90be4059a0a5967f3c6fdb1 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sat, 4 Feb 2012 13:04:46 +0000 +Subject: ipv4: reset flowi parameters on route connect + + +From: Julian Anastasov + +[ Upstream commit e6b45241c57a83197e5de9166b3b0d32ac562609 ] + +Eric Dumazet found that commit 813b3b5db83 +(ipv4: Use caller's on-stack flowi as-is in output +route lookups.) that comes in 3.0 added a regression. +The problem appears to be that resulting flowi4_oif is +used incorrectly as input parameter to some routing lookups. +The result is that when connecting to local port without +listener if the IP address that is used is not on a loopback +interface we incorrectly assign RTN_UNICAST to the output +route because no route is matched by oif=lo. The RST packet +can not be sent immediately by tcp_v4_send_reset because +it expects RTN_LOCAL. + + So, change ip_route_connect and ip_route_newports to +update the flowi4 fields that are input parameters because +we do not want unnecessary binding to oif. + + To make it clear what are the input parameters that +can be modified during lookup and to show which fields of +floiw4 are reused add a new function to update the flowi4 +structure: flowi4_update_output. + +Thanks to Yurij M. Plotnikov for providing a bug report including a +program to reproduce the problem. + +Thanks to Eric Dumazet for tracking the problem down to +tcp_v4_send_reset and providing initial fix. + +Reported-by: Yurij M. Plotnikov +Signed-off-by: Julian Anastasov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/flow.h | 10 ++++++++++ + include/net/route.h | 4 ++++ + 2 files changed, 14 insertions(+) + +--- a/include/net/flow.h ++++ b/include/net/flow.h +@@ -90,6 +90,16 @@ static inline void flowi4_init_output(st + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; + } ++ ++/* Reset some input parameters after previous lookup */ ++static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, ++ __be32 daddr, __be32 saddr) ++{ ++ fl4->flowi4_oif = oif; ++ fl4->flowi4_tos = tos; ++ fl4->daddr = daddr; ++ fl4->saddr = saddr; ++} + + + struct flowi6 { +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -270,6 +270,7 @@ static inline struct rtable *ip_route_co + if (IS_ERR(rt)) + return rt; + ip_rt_put(rt); ++ flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); + } + security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + return ip_route_output_flow(net, fl4, sk); +@@ -284,6 +285,9 @@ static inline struct rtable *ip_route_ne + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; + ip_rt_put(rt); ++ flowi4_update_output(fl4, sk->sk_bound_dev_if, ++ RT_CONN_FLAGS(sk), fl4->daddr, ++ fl4->saddr); + security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + return ip_route_output_flow(sock_net(sk), fl4, sk); + } diff --git a/queue-3.2/net-don-t-proxy-arp-respond-if-iif-rt-dst.dev-if-private-vlan-is-disabled.patch b/queue-3.2/net-don-t-proxy-arp-respond-if-iif-rt-dst.dev-if-private-vlan-is-disabled.patch new file mode 100644 index 00000000000..9a8af04836d --- /dev/null +++ b/queue-3.2/net-don-t-proxy-arp-respond-if-iif-rt-dst.dev-if-private-vlan-is-disabled.patch @@ -0,0 +1,40 @@ +From daa0b48ed4cee0e042319842f14148825f373355 Mon Sep 17 00:00:00 2001 +From: Thomas Graf +Date: Fri, 10 Feb 2012 04:07:11 +0000 +Subject: net: Don't proxy arp respond if iif == rt->dst.dev if private VLAN is disabled + + +From: Thomas Graf + +[ Upstream commit 70620c46ac2b45c24b0f22002fdf5ddd1f7daf81 ] + +Commit 653241 (net: RFC3069, private VLAN proxy arp support) changed +the behavior of arp proxy to send arp replies back out on the interface +the request came in even if the private VLAN feature is disabled. + +Previously we checked rt->dst.dev != skb->dev for in scenarios, when +proxy arp is enabled on for the netdevice and also when individual proxy +neighbour entries have been added. + +This patch adds the check back for the pneigh_lookup() scenario. + +Signed-off-by: Thomas Graf +Acked-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/arp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -867,7 +867,8 @@ static int arp_process(struct sk_buff *s + if (addr_type == RTN_UNICAST && + (arp_fwd_proxy(in_dev, dev, rt) || + arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || +- pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { ++ (rt->dst.dev != dev && ++ pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { + n = neigh_event_ns(&arp_tbl, sha, &sip, dev); + if (n) + neigh_release(n); diff --git a/queue-3.2/net-make-qdisc_skb_cb-upper-size-bound-explicit.patch b/queue-3.2/net-make-qdisc_skb_cb-upper-size-bound-explicit.patch new file mode 100644 index 00000000000..f754c6c4132 --- /dev/null +++ b/queue-3.2/net-make-qdisc_skb_cb-upper-size-bound-explicit.patch @@ -0,0 +1,82 @@ +From 1b78ca90b2b0800460eda8a248763bc8e5292101 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" +Date: Mon, 6 Feb 2012 15:14:37 -0500 +Subject: net: Make qdisc_skb_cb upper size bound explicit. + + +From: "David S. Miller" + +[ Upstream commit 16bda13d90c8d5da243e2cfa1677e62ecce26860 ] + +Just like skb->cb[], so that qdisc_skb_cb can be encapsulated inside +of other data structures. + +This is intended to be used by IPoIB so that it can remember +addressing information stored at hard_header_ops->create() time that +it can fetch when the packet gets to the transmit routine. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sch_generic.h | 9 ++++++++- + net/sched/sch_choke.c | 3 +-- + net/sched/sch_netem.c | 3 +-- + net/sched/sch_sfb.c | 3 +-- + 4 files changed, 11 insertions(+), 7 deletions(-) + +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -220,9 +220,16 @@ struct tcf_proto { + + struct qdisc_skb_cb { + unsigned int pkt_len; +- long data[]; ++ unsigned char data[24]; + }; + ++static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) ++{ ++ struct qdisc_skb_cb *qcb; ++ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz); ++ BUILD_BUG_ON(sizeof(qcb->data) < sz); ++} ++ + static inline int qdisc_qlen(const struct Qdisc *q) + { + return q->q.qlen; +--- a/net/sched/sch_choke.c ++++ b/net/sched/sch_choke.c +@@ -225,8 +225,7 @@ struct choke_skb_cb { + + static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) + { +- BUILD_BUG_ON(sizeof(skb->cb) < +- sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); ++ qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb)); + return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; + } + +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -118,8 +118,7 @@ struct netem_skb_cb { + + static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) + { +- BUILD_BUG_ON(sizeof(skb->cb) < +- sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); ++ qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; + } + +--- a/net/sched/sch_sfb.c ++++ b/net/sched/sch_sfb.c +@@ -93,8 +93,7 @@ struct sfb_skb_cb { + + static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) + { +- BUILD_BUG_ON(sizeof(skb->cb) < +- sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); ++ qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); + return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; + } + diff --git a/queue-3.2/net_sched-bug-in-netem-reordering.patch b/queue-3.2/net_sched-bug-in-netem-reordering.patch new file mode 100644 index 00000000000..6d860bf172d --- /dev/null +++ b/queue-3.2/net_sched-bug-in-netem-reordering.patch @@ -0,0 +1,40 @@ +From 0094d29e8b1d0617617fa48fefe2d049d82dda13 Mon Sep 17 00:00:00 2001 +From: Hagen Paul Pfeifer +Date: Wed, 4 Jan 2012 17:35:26 +0000 +Subject: net_sched: Bug in netem reordering + + +From: Hagen Paul Pfeifer + +[ Upstream commit eb10192447370f19a215a8c2749332afa1199d46 ] + +Not now, but it looks you are correct. q->qdisc is NULL until another +additional qdisc is attached (beside tfifo). See 50612537e9ab2969312. +The following patch should work. + +From: Hagen Paul Pfeifer + +netem: catch NULL pointer by updating the real qdisc statistic + +Reported-by: Vijay Subramanian +Signed-off-by: Hagen Paul Pfeifer +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_netem.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -383,8 +383,8 @@ static int netem_enqueue(struct sk_buff + q->counter = 0; + + __skb_queue_head(&q->qdisc->q, skb); +- q->qdisc->qstats.backlog += qdisc_pkt_len(skb); +- q->qdisc->qstats.requeues++; ++ sch->qstats.backlog += qdisc_pkt_len(skb); ++ sch->qstats.requeues++; + ret = NET_XMIT_SUCCESS; + } + diff --git a/queue-3.2/netpoll-netpoll_poll_dev-should-access-dev-flags.patch b/queue-3.2/netpoll-netpoll_poll_dev-should-access-dev-flags.patch new file mode 100644 index 00000000000..aef018a4e44 --- /dev/null +++ b/queue-3.2/netpoll-netpoll_poll_dev-should-access-dev-flags.patch @@ -0,0 +1,33 @@ +From 16d7c5c85644e3646505c7d5b1f109b577ff89fc Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 14 Feb 2012 10:11:59 +0000 +Subject: netpoll: netpoll_poll_dev() should access dev->flags + + +From: Eric Dumazet + +[ Upstream commit 58e05f357a039a94aa36475f8c110256f693a239 ] + +commit 5a698af53f (bond: service netpoll arp queue on master device) +tested IFF_SLAVE flag against dev->priv_flags instead of dev->flags + +Signed-off-by: Eric Dumazet +Cc: WANG Cong +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/netpoll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -194,7 +194,7 @@ static void netpoll_poll_dev(struct net_ + + poll_napi(dev); + +- if (dev->priv_flags & IFF_SLAVE) { ++ if (dev->flags & IFF_SLAVE) { + if (dev->npinfo) { + struct net_device *bond_dev = dev->master; + struct sk_buff *skb; diff --git a/queue-3.2/series b/queue-3.2/series index c7b1894f6ab..dcd442cec05 100644 --- a/queue-3.2/series +++ b/queue-3.2/series @@ -16,3 +16,18 @@ nfsv4-ensure-we-throw-out-bad-delegation-stateids-on-nfs4err_bad_stateid.patch nfsv4-fix-server_scope-memory-leak.patch arm-7321-1-cache-v7-disable-preemption-when-reading-ccsidr.patch arm-7325-1-fix-v7-boot-with-lockdep-enabled.patch +3c59x-shorten-timer-period-for-slave-devices.patch +net-don-t-proxy-arp-respond-if-iif-rt-dst.dev-if-private-vlan-is-disabled.patch +netpoll-netpoll_poll_dev-should-access-dev-flags.patch +net_sched-bug-in-netem-reordering.patch +veth-enforce-minimum-size-of-veth_info_peer.patch +via-velocity-s3-resume-fix.patch +ipv4-reset-flowi-parameters-on-route-connect.patch +tcp_v4_send_reset-binding-oif-to-iif-in-no-sock-case.patch +ipv4-fix-wrong-order-of-ip_rt_get_source-and-update-iph-daddr.patch +net-make-qdisc_skb_cb-upper-size-bound-explicit.patch +ipoib-stop-lying-about-hard_header_len-and-use-skb-cb-to-stash-ll-addresses.patch +gro-more-generic-l2-header-check.patch +tcp-allow-tcp_sacktag_one-to-tag-ranges-not-aligned-with-skbs.patch +tcp-fix-range-tcp_shifted_skb-passes-to-tcp_sacktag_one.patch +tcp-fix-tcp_shifted_skb-adjustment-of-lost_cnt_hint-for-fack.patch diff --git a/queue-3.2/tcp-allow-tcp_sacktag_one-to-tag-ranges-not-aligned-with-skbs.patch b/queue-3.2/tcp-allow-tcp_sacktag_one-to-tag-ranges-not-aligned-with-skbs.patch new file mode 100644 index 00000000000..b4e13b2c476 --- /dev/null +++ b/queue-3.2/tcp-allow-tcp_sacktag_one-to-tag-ranges-not-aligned-with-skbs.patch @@ -0,0 +1,116 @@ +From dc36ad741926ed1fa24721ac0384c96108cf25a8 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Sun, 12 Feb 2012 18:37:09 +0000 +Subject: tcp: allow tcp_sacktag_one() to tag ranges not aligned with skbs + + +From: Neal Cardwell + +[ Upstream commit cc9a672ee522d4805495b98680f4a3db5d0a0af9 ] + +This commit allows callers of tcp_sacktag_one() to pass in sequence +ranges that do not align with skb boundaries, as tcp_shifted_skb() +needs to do in an upcoming fix in this patch series. + +In fact, now tcp_sacktag_one() does not need to depend on an input skb +at all, which makes its semantics and dependencies more clear. + +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1310,25 +1310,26 @@ static int tcp_match_skb_to_sack(struct + return in_sack; + } + +-static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, +- struct tcp_sacktag_state *state, ++/* Mark the given newly-SACKed range as such, adjusting counters and hints. */ ++static u8 tcp_sacktag_one(struct sock *sk, ++ struct tcp_sacktag_state *state, u8 sacked, ++ u32 start_seq, u32 end_seq, + int dup_sack, int pcount) + { + struct tcp_sock *tp = tcp_sk(sk); +- u8 sacked = TCP_SKB_CB(skb)->sacked; + int fack_count = state->fack_count; + + /* Account D-SACK for retransmitted packet. */ + if (dup_sack && (sacked & TCPCB_RETRANS)) { + if (tp->undo_marker && tp->undo_retrans && +- after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) ++ after(end_seq, tp->undo_marker)) + tp->undo_retrans--; + if (sacked & TCPCB_SACKED_ACKED) + state->reord = min(fack_count, state->reord); + } + + /* Nothing to do; acked frame is about to be dropped (was ACKed). */ +- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) ++ if (!after(end_seq, tp->snd_una)) + return sacked; + + if (!(sacked & TCPCB_SACKED_ACKED)) { +@@ -1347,13 +1348,13 @@ static u8 tcp_sacktag_one(const struct s + /* New sack for not retransmitted frame, + * which was in hole. It is reordering. + */ +- if (before(TCP_SKB_CB(skb)->seq, ++ if (before(start_seq, + tcp_highest_sack_seq(tp))) + state->reord = min(fack_count, + state->reord); + + /* SACK enhanced F-RTO (RFC4138; Appendix B) */ +- if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) ++ if (!after(end_seq, tp->frto_highmark)) + state->flag |= FLAG_ONLY_ORIG_SACKED; + } + +@@ -1371,8 +1372,7 @@ static u8 tcp_sacktag_one(const struct s + + /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ + if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && +- before(TCP_SKB_CB(skb)->seq, +- TCP_SKB_CB(tp->lost_skb_hint)->seq)) ++ before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) + tp->lost_cnt_hint += pcount; + + if (fack_count > tp->fackets_out) +@@ -1428,7 +1428,11 @@ static int tcp_shifted_skb(struct sock * + } + + /* We discard results */ +- tcp_sacktag_one(skb, sk, state, dup_sack, pcount); ++ tcp_sacktag_one(sk, state, ++ TCP_SKB_CB(skb)->sacked, ++ TCP_SKB_CB(skb)->seq, ++ TCP_SKB_CB(skb)->end_seq, ++ dup_sack, pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); +@@ -1667,10 +1671,14 @@ static struct sk_buff *tcp_sacktag_walk( + break; + + if (in_sack) { +- TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, +- state, +- dup_sack, +- tcp_skb_pcount(skb)); ++ TCP_SKB_CB(skb)->sacked = ++ tcp_sacktag_one(sk, ++ state, ++ TCP_SKB_CB(skb)->sacked, ++ TCP_SKB_CB(skb)->seq, ++ TCP_SKB_CB(skb)->end_seq, ++ dup_sack, ++ tcp_skb_pcount(skb)); + + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) diff --git a/queue-3.2/tcp-fix-range-tcp_shifted_skb-passes-to-tcp_sacktag_one.patch b/queue-3.2/tcp-fix-range-tcp_shifted_skb-passes-to-tcp_sacktag_one.patch new file mode 100644 index 00000000000..eb29809a66c --- /dev/null +++ b/queue-3.2/tcp-fix-range-tcp_shifted_skb-passes-to-tcp_sacktag_one.patch @@ -0,0 +1,74 @@ +From fdfc213119be8140f52fa3a2a8d44e0669d394c0 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Sun, 12 Feb 2012 18:37:10 +0000 +Subject: tcp: fix range tcp_shifted_skb() passes to tcp_sacktag_one() + + +From: Neal Cardwell + +[ Upstream commit daef52bab1fd26e24e8e9578f8fb33ba1d0cb412 ] + +Fix the newly-SACKed range to be the range of newly-shifted bytes. + +Previously - since 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 - +tcp_shifted_skb() incorrectly called tcp_sacktag_one() with the start +and end sequence numbers of the skb it passes in set to the range just +beyond the range that is newly-SACKed. + +This commit also removes a special-case adjustment to lost_cnt_hint in +tcp_shifted_skb() since the pre-existing adjustment of lost_cnt_hint +in tcp_sacktag_one() now properly handles this things now that the +correct start sequence number is passed in. + +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1391,6 +1391,9 @@ static u8 tcp_sacktag_one(struct sock *s + return sacked; + } + ++/* Shift newly-SACKed bytes from this skb to the immediately previous ++ * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. ++ */ + static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_sacktag_state *state, + unsigned int pcount, int shifted, int mss, +@@ -1398,12 +1401,11 @@ static int tcp_shifted_skb(struct sock * + { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev = tcp_write_queue_prev(sk, skb); ++ u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ ++ u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ + + BUG_ON(!pcount); + +- if (skb == tp->lost_skb_hint) +- tp->lost_cnt_hint += pcount; +- + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + +@@ -1427,12 +1429,11 @@ static int tcp_shifted_skb(struct sock * + skb_shinfo(skb)->gso_type = 0; + } + +- /* We discard results */ +- tcp_sacktag_one(sk, state, +- TCP_SKB_CB(skb)->sacked, +- TCP_SKB_CB(skb)->seq, +- TCP_SKB_CB(skb)->end_seq, +- dup_sack, pcount); ++ /* Adjust counters and hints for the newly sacked sequence range but ++ * discard the return value since prev is already marked. ++ */ ++ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, ++ start_seq, end_seq, dup_sack, pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); diff --git a/queue-3.2/tcp-fix-tcp_shifted_skb-adjustment-of-lost_cnt_hint-for-fack.patch b/queue-3.2/tcp-fix-tcp_shifted_skb-adjustment-of-lost_cnt_hint-for-fack.patch new file mode 100644 index 00000000000..f60514e0696 --- /dev/null +++ b/queue-3.2/tcp-fix-tcp_shifted_skb-adjustment-of-lost_cnt_hint-for-fack.patch @@ -0,0 +1,39 @@ +From 26a771b8a27372574be245ce69f8449796ef34c5 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Mon, 13 Feb 2012 20:22:08 +0000 +Subject: tcp: fix tcp_shifted_skb() adjustment of lost_cnt_hint for FACK + + +From: Neal Cardwell + +[ Upstream commit 0af2a0d0576205dda778d25c6c344fc6508fc81d ] + +This commit ensures that lost_cnt_hint is correctly updated in +tcp_shifted_skb() for FACK TCP senders. The lost_cnt_hint adjustment +in tcp_sacktag_one() only applies to non-FACK senders, so FACK senders +need their own adjustment. + +This applies the spirit of 1e5289e121372a3494402b1b131b41bfe1cf9b7f - +except now that the sequence range passed into tcp_sacktag_one() is +correct we need only have a special case adjustment for FACK. + +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1406,6 +1406,10 @@ static int tcp_shifted_skb(struct sock * + + BUG_ON(!pcount); + ++ /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ ++ if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) ++ tp->lost_cnt_hint += pcount; ++ + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + diff --git a/queue-3.2/tcp_v4_send_reset-binding-oif-to-iif-in-no-sock-case.patch b/queue-3.2/tcp_v4_send_reset-binding-oif-to-iif-in-no-sock-case.patch new file mode 100644 index 00000000000..8f9d94418ac --- /dev/null +++ b/queue-3.2/tcp_v4_send_reset-binding-oif-to-iif-in-no-sock-case.patch @@ -0,0 +1,45 @@ +From 49d465a29eb8455ee12ee96de7c6a8375db3d261 Mon Sep 17 00:00:00 2001 +From: Shawn Lu +Date: Sat, 4 Feb 2012 12:38:09 +0000 +Subject: tcp_v4_send_reset: binding oif to iif in no sock case + + +From: Shawn Lu + +[ Upstream commit e2446eaab5585555a38ea0df4e01ff313dbb4ac9 ] + +Binding RST packet outgoing interface to incoming interface +for tcp v4 when there is no socket associate with it. +when sk is not NULL, using sk->sk_bound_dev_if instead. +(suggested by Eric Dumazet). + +This has few benefits: +1. tcp_v6_send_reset already did that. +2. This helps tcp connect with SO_BINDTODEVICE set. When +connection is lost, we still able to sending out RST using +same interface. +3. we are sending reply, it is most likely to be succeed +if iif is used + +Signed-off-by: Shawn Lu +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -650,6 +650,11 @@ static void tcp_v4_send_reset(struct soc + arg.iov[0].iov_len, IPPROTO_TCP, 0); + arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; ++ /* When socket is gone, all binding information is lost. ++ * routing might fail in this case. using iif for oif to ++ * make sure we can deliver it ++ */ ++ arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); + + net = dev_net(skb_dst(skb)->dev); + arg.tos = ip_hdr(skb)->tos; diff --git a/queue-3.2/veth-enforce-minimum-size-of-veth_info_peer.patch b/queue-3.2/veth-enforce-minimum-size-of-veth_info_peer.patch new file mode 100644 index 00000000000..d38fba90b22 --- /dev/null +++ b/queue-3.2/veth-enforce-minimum-size-of-veth_info_peer.patch @@ -0,0 +1,35 @@ +From d1325dd9effb8ebe78b461f311f2f0ed93ffd667 Mon Sep 17 00:00:00 2001 +From: Thomas Graf +Date: Wed, 15 Feb 2012 04:09:46 +0000 +Subject: veth: Enforce minimum size of VETH_INFO_PEER + + +From: Hagen Paul Pfeifer + +[ Upstream commit 237114384ab22c174ec4641e809f8e6cbcfce774 ] + +VETH_INFO_PEER carries struct ifinfomsg plus optional IFLA +attributes. A minimal size of sizeof(struct ifinfomsg) must be +enforced or we may risk accessing that struct beyond the limits +of the netlink message. + +Signed-off-by: Thomas Graf +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/veth.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -423,7 +423,9 @@ static void veth_dellink(struct net_devi + unregister_netdevice_queue(peer, head); + } + +-static const struct nla_policy veth_policy[VETH_INFO_MAX + 1]; ++static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { ++ [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, ++}; + + static struct rtnl_link_ops veth_link_ops = { + .kind = DRV_NAME, diff --git a/queue-3.2/via-velocity-s3-resume-fix.patch b/queue-3.2/via-velocity-s3-resume-fix.patch new file mode 100644 index 00000000000..aa14023e324 --- /dev/null +++ b/queue-3.2/via-velocity-s3-resume-fix.patch @@ -0,0 +1,35 @@ +From 39bb3df6167f44c2b16166a92a8a3a41f888cf0b Mon Sep 17 00:00:00 2001 +From: David Lv +Date: Sat, 4 Feb 2012 23:22:26 +0000 +Subject: via-velocity: S3 resume fix. + + +From: David Lv + +[ Upstream commit b530b1930bbd9d005345133f0ff0c556d2a52b19 ] + +Initially diagnosed on Ubuntu 11.04 with kernel 2.6.38. + +velocity_close is not called during a suspend / resume cycle in this +driver and it has no business playing directly with power states. + +Signed-off-by: David Lv +Acked-by: Francois Romieu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/via/via-velocity.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/net/ethernet/via/via-velocity.c ++++ b/drivers/net/ethernet/via/via-velocity.c +@@ -2489,9 +2489,6 @@ static int velocity_close(struct net_dev + if (dev->irq != 0) + free_irq(dev->irq, dev); + +- /* Power down the chip */ +- pci_set_power_state(vptr->pdev, PCI_D3hot); +- + velocity_free_rings(vptr); + + vptr->flags &= (~VELOCITY_FLAGS_OPENED);